From 0509b26ce18b1fc59b9f85f782220d6dd54cd765 Mon Sep 17 00:00:00 2001 From: D-Walther <180276392+D-Walther@users.noreply.github.com> Date: Thu, 4 Sep 2025 15:00:59 +0200 Subject: [PATCH 1/5] Add failing test --- tests/test_compiler/test_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_compiler/test_util.py b/tests/test_compiler/test_util.py index a9c89be..2612435 100644 --- a/tests/test_compiler/test_util.py +++ b/tests/test_compiler/test_util.py @@ -133,6 +133,7 @@ def test_snake_case() -> None: assert snake_case("SnakeCase") == "snake_case" assert snake_case("snakeCase") == "snake_case" assert snake_case("SNAKE_CASE") == "snake_case" + assert snake_case("SNAKE_42_CASE") == "snake_42_case" def test_cast_or_raise() -> None: From c9706689bc3f9091270e8dea18bcdf7dab89dad2 Mon Sep 17 00:00:00 2001 From: D-Walther <180276392+D-Walther@users.noreply.github.com> Date: Thu, 4 Sep 2025 15:41:17 +0200 Subject: [PATCH 2/5] Fix: numbers in uppercase are merged with previous word. Refactor. - E.g. MY_123_ENUM gets split into my123_enum - Refactor to make it easier to debug the intermediate steps --- compiler/bitproto/utils.py | 40 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/compiler/bitproto/utils.py b/compiler/bitproto/utils.py index 8dfd5df..cf2c38a 100644 --- a/compiler/bitproto/utils.py +++ b/compiler/bitproto/utils.py @@ -1,3 +1,4 @@ +import itertools import os import re import sys @@ -360,14 +361,8 @@ def pascal_case(word: str) -> str: return "".join(items) -_snake_case_regex_head = r"[A-Z0-9]" -_snake_case_regex_tail = r"[^A-Z0-9]" -_snake_case_regex_capital_match = re.compile( - rf"({_snake_case_regex_head}+{_snake_case_regex_tail}*)" -) -_snake_case_regex_m_capital_match = re.compile( - rf"^({_snake_case_regex_head}{{1,}})({_snake_case_regex_head}+{_snake_case_regex_tail}+)$" -) +# Uppercase preceded by a lowercase marks the start of a new camelCase word +_snake_case_regex_camel_match = re.compile(r"(?<=[a-z])([A-Z]+[a-z0-9]*)") def snake_case(word: str) -> str: @@ -376,23 +371,12 @@ def snake_case(word: str) -> str: >>> snake_case("someWord") "some_word" """ - underscore = "_" - no_underscore_words = word.split(underscore) - no_underscore_cases: List[str] = [] - - for w in no_underscore_words: - cases = filter(None, _snake_case_regex_capital_match.split(w)) - for case in cases: - subcases = filter(None, _snake_case_regex_m_capital_match.split(case)) - if subcases: - for subcase in subcases: - no_underscore_cases.append(subcase) - else: - no_underscore_cases.append(case) - - snake_word = "" - for case in no_underscore_cases: - if not case.isdigit(): - snake_word += underscore - snake_word += case - return snake_word.strip(underscore).lower() + snake_case_split: List[str] = word.split("_") + + camel_case_split: List[str] = list( + itertools.chain.from_iterable( + filter(None, _snake_case_regex_camel_match.split(w)) + for w in snake_case_split + ) + ) + return "_".join(camel_case_split).lower() From 11444126759f13ad5307a41b67773669aa5eb60f Mon Sep 17 00:00:00 2001 From: hit9 Date: Sat, 20 Sep 2025 17:09:36 +0800 Subject: [PATCH 3/5] improve the util:snake_case --- compiler/bitproto/utils.py | 62 ++++++++++++++++++++++++-------- tests/test_compiler/test_util.py | 25 +++++++++++++ 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/compiler/bitproto/utils.py b/compiler/bitproto/utils.py index cf2c38a..2befb53 100644 --- a/compiler/bitproto/utils.py +++ b/compiler/bitproto/utils.py @@ -361,22 +361,56 @@ def pascal_case(word: str) -> str: return "".join(items) -# Uppercase preceded by a lowercase marks the start of a new camelCase word -_snake_case_regex_camel_match = re.compile(r"(?<=[a-z])([A-Z]+[a-z0-9]*)") +_snakecase_re_camel_b1 = re.compile(r"(.)([A-Z][a-z]+)") # Xy boundary +_snakecase_re_camel_b2 = re.compile(r"([a-z0-9])([A-Z])") # aA/0A boundary +_snakecase_re_alpha_to_digit = re.compile(r"([A-Za-z])([0-9])") +_snakecase_re_digit_to_alpha = re.compile(r"([0-9])([A-Za-z])") +_snakecase_re_multi_us = re.compile(r"__+") +_snakecase_re_upper_or_digits = re.compile(r"^[A-Z0-9]+$") +_snakecase_re_mixed_case = re.compile(r"[A-Z].*[a-z]|[a-z].*[A-Z]") +_snakecase_re_leading_us = re.compile(r"^_+") +_snakecase_re_trailing_us = re.compile(r"_+$") -def snake_case(word: str) -> str: - """Converts given word to snake case. - >>> snake_case("someWord") - "some_word" +def snake_case(word: str) -> str: """ - snake_case_split: List[str] = word.split("_") - - camel_case_split: List[str] = list( - itertools.chain.from_iterable( - filter(None, _snake_case_regex_camel_match.split(w)) - for w in snake_case_split - ) + Convert identifier to snake_case with common-sense rules: + - Preserve leading/trailing underscores exactly. + - Normalize interior underscores. + - Default: split at camel boundaries and letter<->digit boundaries. + - If original has both '_' and mixed case, do NOT split letter<->digit. + - Do NOT split letter<->digit inside ALL-UPPER tokens. + """ + if not word: + return "" + + # Preserve edge underscores (e.g., '__init__') + s = word.replace("-", "_") + pre_m = _snakecase_re_leading_us.match(s) + pre = pre_m.group(0) if pre_m else "" + rest = s[len(pre) :] # use the remainder to find suffix + suf_m = _snakecase_re_trailing_us.search(rest) + suf = suf_m.group(0) if suf_m else "" + core = rest[: len(rest) - len(suf)] # core = s - pre - suf + + respect_author_digits = ("_" in word) and bool( + _snakecase_re_mixed_case.search(word) ) - return "_".join(camel_case_split).lower() + + parts: List[str] = [] + for t in core.split("_"): + if not t: + continue + # camel splits (two-pass) + t = _snakecase_re_camel_b1.sub(r"\1_\2", t) + t = _snakecase_re_camel_b2.sub(r"\1_\2", t) + # letter<->digit split when allowed + if not respect_author_digits and not _snakecase_re_upper_or_digits.fullmatch(t): + t = _snakecase_re_alpha_to_digit.sub(r"\1_\2", t) + t = _snakecase_re_digit_to_alpha.sub(r"\1_\2", t) + parts.append(t) + + core_snake = "_".join(parts) + core_snake = _snakecase_re_multi_us.sub("_", core_snake).strip("_").lower() + return f"{pre}{core_snake}{suf}" diff --git a/tests/test_compiler/test_util.py b/tests/test_compiler/test_util.py index 2612435..6e1ef74 100644 --- a/tests/test_compiler/test_util.py +++ b/tests/test_compiler/test_util.py @@ -129,11 +129,36 @@ def test_pascal_case() -> None: def test_snake_case() -> None: + assert snake_case("") == "" + assert snake_case("123") == "123" + assert snake_case("A") == "a" assert snake_case("snake_case") == "snake_case" assert snake_case("SnakeCase") == "snake_case" assert snake_case("snakeCase") == "snake_case" assert snake_case("SNAKE_CASE") == "snake_case" assert snake_case("SNAKE_42_CASE") == "snake_42_case" + assert snake_case("HTTPServer") == "http_server" + assert snake_case("getHTTPResponseCode") == "get_http_response_code" + assert snake_case("Mixed_SnakeCase") == "mixed_snake_case" + assert snake_case("Snake42Case") == "snake_42_case" + assert snake_case("xY") == "x_y" + assert snake_case("Xy") == "xy" + assert snake_case("Id") == "id" + assert snake_case("__Init__") == "__init__" + assert snake_case("__") == "__" + assert snake_case("foo__bar") == "foo_bar" + assert snake_case("already_snake_case") == "already_snake_case" + assert snake_case("kebab-case-here") == "kebab_case_here" + assert snake_case("Ipv6Address") == "ipv_6_address" + assert snake_case("Ipv6_Address") == "ipv6_address" + assert snake_case("MyMessage_v1") == "my_message_v1" + assert snake_case("camelCase123") == "camel_case_123" + assert snake_case("_privateVariable") == "_private_variable" + assert snake_case("GPU3DModel") == "gpu_3_d_model" + assert snake_case("TI82") == "ti82" + assert snake_case("TI82_PLUS") == "ti82_plus" + assert snake_case("MyMessage_mk2") == "my_message_mk2" + assert snake_case("MY_VALUE1") == "my_value1" def test_cast_or_raise() -> None: From d7cc98f9d90fc2afa901da3ba04c8f44d6dadb5e Mon Sep 17 00:00:00 2001 From: hit9 Date: Sat, 20 Sep 2025 17:14:32 +0800 Subject: [PATCH 4/5] remove useless import: itertools --- compiler/bitproto/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/bitproto/utils.py b/compiler/bitproto/utils.py index 2befb53..0d1217e 100644 --- a/compiler/bitproto/utils.py +++ b/compiler/bitproto/utils.py @@ -1,4 +1,3 @@ -import itertools import os import re import sys From 7921cdcb5a1eb612a68e6fe0f1cd953399e45a8a Mon Sep 17 00:00:00 2001 From: hit9 Date: Sat, 20 Sep 2025 17:16:17 +0800 Subject: [PATCH 5/5] bump version 1.2.2 --- changes.rst | 9 +++++++++ compiler/bitproto/__init__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/changes.rst b/changes.rst index 323d631..8fa6258 100644 --- a/changes.rst +++ b/changes.rst @@ -1,5 +1,14 @@ .. currentmodule:: bitproto +Version 1.2.2 +------------- + +.. _version-1.2.2: + +Warning: May break some existing projects's generated names: + +- Improve `snake_case` function. #74, #75 + Version 1.2.1 ------------- diff --git a/compiler/bitproto/__init__.py b/compiler/bitproto/__init__.py index a2714de..663ffd0 100644 --- a/compiler/bitproto/__init__.py +++ b/compiler/bitproto/__init__.py @@ -8,5 +8,5 @@ """ -__version__ = "1.2.1" +__version__ = "1.2.2" __description__ = "bit level data interchange format."