diff --git a/changes.rst b/changes.rst index 323d631..8fa6258 100644 --- a/changes.rst +++ b/changes.rst @@ -1,5 +1,14 @@ .. currentmodule:: bitproto +Version 1.2.2 +------------- + +.. _version-1.2.2: + +Warning: May break some existing projects's generated names: + +- Improve `snake_case` function. #74, #75 + Version 1.2.1 ------------- diff --git a/compiler/bitproto/__init__.py b/compiler/bitproto/__init__.py index a2714de..663ffd0 100644 --- a/compiler/bitproto/__init__.py +++ b/compiler/bitproto/__init__.py @@ -8,5 +8,5 @@ """ -__version__ = "1.2.1" +__version__ = "1.2.2" __description__ = "bit level data interchange format." diff --git a/compiler/bitproto/utils.py b/compiler/bitproto/utils.py index 8dfd5df..0d1217e 100644 --- a/compiler/bitproto/utils.py +++ b/compiler/bitproto/utils.py @@ -360,39 +360,56 @@ def pascal_case(word: str) -> str: return "".join(items) -_snake_case_regex_head = r"[A-Z0-9]" -_snake_case_regex_tail = r"[^A-Z0-9]" -_snake_case_regex_capital_match = re.compile( - rf"({_snake_case_regex_head}+{_snake_case_regex_tail}*)" -) -_snake_case_regex_m_capital_match = re.compile( - rf"^({_snake_case_regex_head}{{1,}})({_snake_case_regex_head}+{_snake_case_regex_tail}+)$" -) +_snakecase_re_camel_b1 = re.compile(r"(.)([A-Z][a-z]+)") # Xy boundary +_snakecase_re_camel_b2 = re.compile(r"([a-z0-9])([A-Z])") # aA/0A boundary +_snakecase_re_alpha_to_digit = re.compile(r"([A-Za-z])([0-9])") +_snakecase_re_digit_to_alpha = re.compile(r"([0-9])([A-Za-z])") +_snakecase_re_multi_us = re.compile(r"__+") +_snakecase_re_upper_or_digits = re.compile(r"^[A-Z0-9]+$") +_snakecase_re_mixed_case = re.compile(r"[A-Z].*[a-z]|[a-z].*[A-Z]") +_snakecase_re_leading_us = re.compile(r"^_+") +_snakecase_re_trailing_us = re.compile(r"_+$") -def snake_case(word: str) -> str: - """Converts given word to snake case. - >>> snake_case("someWord") - "some_word" +def snake_case(word: str) -> str: """ - underscore = "_" - no_underscore_words = word.split(underscore) - no_underscore_cases: List[str] = [] - - for w in no_underscore_words: - cases = filter(None, _snake_case_regex_capital_match.split(w)) - for case in cases: - subcases = filter(None, _snake_case_regex_m_capital_match.split(case)) - if subcases: - for subcase in subcases: - no_underscore_cases.append(subcase) - else: - no_underscore_cases.append(case) - - snake_word = "" - for case in no_underscore_cases: - if not case.isdigit(): - snake_word += underscore - snake_word += case - return snake_word.strip(underscore).lower() + Convert identifier to snake_case with common-sense rules: + - Preserve leading/trailing underscores exactly. + - Normalize interior underscores. + - Default: split at camel boundaries and letter<->digit boundaries. + - If original has both '_' and mixed case, do NOT split letter<->digit. + - Do NOT split letter<->digit inside ALL-UPPER tokens. + """ + if not word: + return "" + + # Preserve edge underscores (e.g., '__init__') + s = word.replace("-", "_") + pre_m = _snakecase_re_leading_us.match(s) + pre = pre_m.group(0) if pre_m else "" + rest = s[len(pre) :] # use the remainder to find suffix + suf_m = _snakecase_re_trailing_us.search(rest) + suf = suf_m.group(0) if suf_m else "" + core = rest[: len(rest) - len(suf)] # core = s - pre - suf + + respect_author_digits = ("_" in word) and bool( + _snakecase_re_mixed_case.search(word) + ) + + parts: List[str] = [] + for t in core.split("_"): + if not t: + continue + # camel splits (two-pass) + t = _snakecase_re_camel_b1.sub(r"\1_\2", t) + t = _snakecase_re_camel_b2.sub(r"\1_\2", t) + # letter<->digit split when allowed + if not respect_author_digits and not _snakecase_re_upper_or_digits.fullmatch(t): + t = _snakecase_re_alpha_to_digit.sub(r"\1_\2", t) + t = _snakecase_re_digit_to_alpha.sub(r"\1_\2", t) + parts.append(t) + + core_snake = "_".join(parts) + core_snake = _snakecase_re_multi_us.sub("_", core_snake).strip("_").lower() + return f"{pre}{core_snake}{suf}" diff --git a/tests/test_compiler/test_util.py b/tests/test_compiler/test_util.py index a9c89be..6e1ef74 100644 --- a/tests/test_compiler/test_util.py +++ b/tests/test_compiler/test_util.py @@ -129,10 +129,36 @@ def test_pascal_case() -> None: def test_snake_case() -> None: + assert snake_case("") == "" + assert snake_case("123") == "123" + assert snake_case("A") == "a" assert snake_case("snake_case") == "snake_case" assert snake_case("SnakeCase") == "snake_case" assert snake_case("snakeCase") == "snake_case" assert snake_case("SNAKE_CASE") == "snake_case" + assert snake_case("SNAKE_42_CASE") == "snake_42_case" + assert snake_case("HTTPServer") == "http_server" + assert snake_case("getHTTPResponseCode") == "get_http_response_code" + assert snake_case("Mixed_SnakeCase") == "mixed_snake_case" + assert snake_case("Snake42Case") == "snake_42_case" + assert snake_case("xY") == "x_y" + assert snake_case("Xy") == "xy" + assert snake_case("Id") == "id" + assert snake_case("__Init__") == "__init__" + assert snake_case("__") == "__" + assert snake_case("foo__bar") == "foo_bar" + assert snake_case("already_snake_case") == "already_snake_case" + assert snake_case("kebab-case-here") == "kebab_case_here" + assert snake_case("Ipv6Address") == "ipv_6_address" + assert snake_case("Ipv6_Address") == "ipv6_address" + assert snake_case("MyMessage_v1") == "my_message_v1" + assert snake_case("camelCase123") == "camel_case_123" + assert snake_case("_privateVariable") == "_private_variable" + assert snake_case("GPU3DModel") == "gpu_3_d_model" + assert snake_case("TI82") == "ti82" + assert snake_case("TI82_PLUS") == "ti82_plus" + assert snake_case("MyMessage_mk2") == "my_message_mk2" + assert snake_case("MY_VALUE1") == "my_value1" def test_cast_or_raise() -> None: