From 0509b26ce18b1fc59b9f85f782220d6dd54cd765 Mon Sep 17 00:00:00 2001 From: D-Walther <180276392+D-Walther@users.noreply.github.com> Date: Thu, 4 Sep 2025 15:00:59 +0200 Subject: [PATCH 1/2] Add failing test --- tests/test_compiler/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_compiler/test_util.py b/tests/test_compiler/test_util.py index a9c89be..2612435 100644 --- a/tests/test_compiler/test_util.py +++ b/tests/test_compiler/test_util.py @@ -133,6 +133,7 @@ def test_snake_case() -> None: assert snake_case("SnakeCase") == "snake_case" assert snake_case("snakeCase") == "snake_case" assert snake_case("SNAKE_CASE") == "snake_case" + assert snake_case("SNAKE_42_CASE") == "snake_42_case" def test_cast_or_raise() -> None: From c9706689bc3f9091270e8dea18bcdf7dab89dad2 Mon Sep 17 00:00:00 2001 From: D-Walther <180276392+D-Walther@users.noreply.github.com> Date: Thu, 4 Sep 2025 15:41:17 +0200 Subject: [PATCH 2/2] Fix: numbers in uppercase are merged with previous word. Refactor. - E.g. MY_123_ENUM gets split into my123_enum - Refactor to make it easier to debug the intermediate steps --- compiler/bitproto/utils.py | 40 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/compiler/bitproto/utils.py b/compiler/bitproto/utils.py index 8dfd5df..cf2c38a 100644 --- a/compiler/bitproto/utils.py +++ b/compiler/bitproto/utils.py @@ -1,3 +1,4 @@ +import itertools import os import re import sys @@ -360,14 +361,8 @@ def pascal_case(word: str) -> str: return "".join(items) -_snake_case_regex_head = r"[A-Z0-9]" -_snake_case_regex_tail = r"[^A-Z0-9]" -_snake_case_regex_capital_match = re.compile( - rf"({_snake_case_regex_head}+{_snake_case_regex_tail}*)" -) -_snake_case_regex_m_capital_match = re.compile( - rf"^({_snake_case_regex_head}{{1,}})({_snake_case_regex_head}+{_snake_case_regex_tail}+)$" -) +# Uppercase preceded by a lowercase marks the start of a new camelCase word +_snake_case_regex_camel_match = re.compile(r"(?<=[a-z])([A-Z]+[a-z0-9]*)") def snake_case(word: str) -> str: @@ -376,23 +371,12 @@ def snake_case(word: str) -> str: >>> snake_case("someWord") "some_word" """ - underscore = "_" - no_underscore_words = word.split(underscore) - no_underscore_cases: List[str] = [] - - for w in no_underscore_words: - cases = filter(None, _snake_case_regex_capital_match.split(w)) - for case in cases: - subcases = filter(None, _snake_case_regex_m_capital_match.split(case)) - if subcases: - for subcase in subcases: - no_underscore_cases.append(subcase) - else: - no_underscore_cases.append(case) - - snake_word = "" - for case in no_underscore_cases: - if not case.isdigit(): - snake_word += underscore - snake_word += case - return snake_word.strip(underscore).lower() + snake_case_split: List[str] = word.split("_") + + camel_case_split: List[str] = list( + itertools.chain.from_iterable( + filter(None, _snake_case_regex_camel_match.split(w)) + for w in snake_case_split + ) + ) + return "_".join(camel_case_split).lower()