Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions changes.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
.. currentmodule:: bitproto

Version 1.2.2
-------------

.. _version-1.2.2:

Warning: May break some existing projects's generated names:

- Improve `snake_case` function. #74, #75

Version 1.2.1
-------------

Expand Down
2 changes: 1 addition & 1 deletion compiler/bitproto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@

"""

__version__ = "1.2.1"
__version__ = "1.2.2"
__description__ = "bit level data interchange format."
81 changes: 49 additions & 32 deletions compiler/bitproto/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,39 +360,56 @@ def pascal_case(word: str) -> str:
return "".join(items)


_snake_case_regex_head = r"[A-Z0-9]"
_snake_case_regex_tail = r"[^A-Z0-9]"
_snake_case_regex_capital_match = re.compile(
rf"({_snake_case_regex_head}+{_snake_case_regex_tail}*)"
)
_snake_case_regex_m_capital_match = re.compile(
rf"^({_snake_case_regex_head}{{1,}})({_snake_case_regex_head}+{_snake_case_regex_tail}+)$"
)
_snakecase_re_camel_b1 = re.compile(r"(.)([A-Z][a-z]+)") # Xy boundary
_snakecase_re_camel_b2 = re.compile(r"([a-z0-9])([A-Z])") # aA/0A boundary
_snakecase_re_alpha_to_digit = re.compile(r"([A-Za-z])([0-9])")
_snakecase_re_digit_to_alpha = re.compile(r"([0-9])([A-Za-z])")

_snakecase_re_multi_us = re.compile(r"__+")
_snakecase_re_upper_or_digits = re.compile(r"^[A-Z0-9]+$")
_snakecase_re_mixed_case = re.compile(r"[A-Z].*[a-z]|[a-z].*[A-Z]")
_snakecase_re_leading_us = re.compile(r"^_+")
_snakecase_re_trailing_us = re.compile(r"_+$")

def snake_case(word: str) -> str:
"""Converts given word to snake case.

>>> snake_case("someWord")
"some_word"
def snake_case(word: str) -> str:
"""
underscore = "_"
no_underscore_words = word.split(underscore)
no_underscore_cases: List[str] = []

for w in no_underscore_words:
cases = filter(None, _snake_case_regex_capital_match.split(w))
for case in cases:
subcases = filter(None, _snake_case_regex_m_capital_match.split(case))
if subcases:
for subcase in subcases:
no_underscore_cases.append(subcase)
else:
no_underscore_cases.append(case)

snake_word = ""
for case in no_underscore_cases:
if not case.isdigit():
snake_word += underscore
snake_word += case
return snake_word.strip(underscore).lower()
Convert identifier to snake_case with common-sense rules:
- Preserve leading/trailing underscores exactly.
- Normalize interior underscores.
- Default: split at camel boundaries and letter<->digit boundaries.
- If original has both '_' and mixed case, do NOT split letter<->digit.
- Do NOT split letter<->digit inside ALL-UPPER tokens.
"""
if not word:
return ""

# Preserve edge underscores (e.g., '__init__')
s = word.replace("-", "_")
pre_m = _snakecase_re_leading_us.match(s)
pre = pre_m.group(0) if pre_m else ""
rest = s[len(pre) :] # use the remainder to find suffix
suf_m = _snakecase_re_trailing_us.search(rest)
suf = suf_m.group(0) if suf_m else ""
core = rest[: len(rest) - len(suf)] # core = s - pre - suf

respect_author_digits = ("_" in word) and bool(
_snakecase_re_mixed_case.search(word)
)

parts: List[str] = []
for t in core.split("_"):
if not t:
continue
# camel splits (two-pass)
t = _snakecase_re_camel_b1.sub(r"\1_\2", t)
t = _snakecase_re_camel_b2.sub(r"\1_\2", t)
# letter<->digit split when allowed
if not respect_author_digits and not _snakecase_re_upper_or_digits.fullmatch(t):
t = _snakecase_re_alpha_to_digit.sub(r"\1_\2", t)
t = _snakecase_re_digit_to_alpha.sub(r"\1_\2", t)
parts.append(t)

core_snake = "_".join(parts)
core_snake = _snakecase_re_multi_us.sub("_", core_snake).strip("_").lower()
return f"{pre}{core_snake}{suf}"
26 changes: 26 additions & 0 deletions tests/test_compiler/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,36 @@ def test_pascal_case() -> None:


def test_snake_case() -> None:
assert snake_case("") == ""
assert snake_case("123") == "123"
assert snake_case("A") == "a"
assert snake_case("snake_case") == "snake_case"
assert snake_case("SnakeCase") == "snake_case"
assert snake_case("snakeCase") == "snake_case"
assert snake_case("SNAKE_CASE") == "snake_case"
assert snake_case("SNAKE_42_CASE") == "snake_42_case"
assert snake_case("HTTPServer") == "http_server"
assert snake_case("getHTTPResponseCode") == "get_http_response_code"
assert snake_case("Mixed_SnakeCase") == "mixed_snake_case"
assert snake_case("Snake42Case") == "snake_42_case"
assert snake_case("xY") == "x_y"
assert snake_case("Xy") == "xy"
assert snake_case("Id") == "id"
assert snake_case("__Init__") == "__init__"
assert snake_case("__") == "__"
assert snake_case("foo__bar") == "foo_bar"
assert snake_case("already_snake_case") == "already_snake_case"
assert snake_case("kebab-case-here") == "kebab_case_here"
assert snake_case("Ipv6Address") == "ipv_6_address"
assert snake_case("Ipv6_Address") == "ipv6_address"
assert snake_case("MyMessage_v1") == "my_message_v1"
assert snake_case("camelCase123") == "camel_case_123"
assert snake_case("_privateVariable") == "_private_variable"
assert snake_case("GPU3DModel") == "gpu_3_d_model"
assert snake_case("TI82") == "ti82"
assert snake_case("TI82_PLUS") == "ti82_plus"
assert snake_case("MyMessage_mk2") == "my_message_mk2"
assert snake_case("MY_VALUE1") == "my_value1"


def test_cast_or_raise() -> None:
Expand Down