From bd475b45222d72a04b252d5d85a9ed11dc98e6ca Mon Sep 17 00:00:00 2001 From: Dan Halperin Date: Thu, 5 Feb 2026 12:04:30 -0800 Subject: [PATCH] Fix trailing newline loss when scrubbing sensitive lines Regression from #225: the _split_line_preserve_whitespace function incorrectly detected leading/trailing whitespace. When re.split is called with a capturing group on a string with leading/trailing whitespace, it produces empty strings at the boundaries: re.split(r"(\s+)", " foo\n") -> ['', ' ', 'foo', '\n', ''] The code was checking parts[0] and parts[-1] for whitespace, but those are empty strings. The actual whitespace is at parts[1] and parts[-2]. Fix: drop empty strings at boundaries before processing, simplifying the leading/trailing whitespace detection. ---- Prompt: ``` Hey I ran netconan on ~/networks/2026-02-04-.../ and got this diff. Did we mess up the newlines somewhere? We're now commenting out the closing brace. ``` commit-id:c8455062 --- netconan/sensitive_item_removal.py | 12 +++++++++--- tests/unit/test_sensitive_item_removal.py | 11 +++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/netconan/sensitive_item_removal.py b/netconan/sensitive_item_removal.py index b8ebbc6..7fac3be 100644 --- a/netconan/sensitive_item_removal.py +++ b/netconan/sensitive_item_removal.py @@ -439,14 +439,20 @@ def _split_line_preserve_whitespace(line): - parts: list of alternating whitespace and word tokens - word_indices: indices of the word (non-whitespace) elements in parts """ - # Split on whitespace, keeping whitespace as separate list elements + # Split on whitespace, keeping whitespace as separate list elements. + # re.split produces empty strings at boundaries when the string starts/ends + # with a delimiter, so drop those to simplify subsequent logic. parts = re.split(r"(\s+)", line) + if parts and parts[0] == "": + parts = parts[1:] + if parts and parts[-1] == "": + parts = parts[:-1] # Identify which indices contain words (non-whitespace, non-empty) word_indices = [i for i, part in enumerate(parts) if part and not part.isspace()] # Derive leading/trailing whitespace from parts words = [parts[i] for i in word_indices] - leading = parts[0] if parts and parts[0] and parts[0].isspace() else "" - trailing = parts[-1] if parts and parts[-1] and parts[-1].isspace() else "" + leading = parts[0] if parts and parts[0].isspace() else "" + trailing = parts[-1] if parts and parts[-1].isspace() else "" return leading, words, trailing, parts, word_indices diff --git a/tests/unit/test_sensitive_item_removal.py b/tests/unit/test_sensitive_item_removal.py index 79e0bab..03b7376 100644 --- a/tests/unit/test_sensitive_item_removal.py +++ b/tests/unit/test_sensitive_item_removal.py @@ -679,6 +679,17 @@ def test_pwd_removal_preserve_trailing_whitespace(regexes, whitespace): assert processed_line.endswith(whitespace) +@pytest.mark.parametrize("whitespace", [" ", "\t", "\n", " \t\n"]) +def test_line_scrub_preserve_trailing_whitespace(regexes, whitespace): + """Test trailing whitespace is preserved when line is scrubbed (encrypted-password case).""" + # encrypted-password triggers line scrubbing (sensitive_item_num=None) + config_line = " encrypted-password SECRET{}".format(whitespace) + pwd_lookup = {} + processed_line = replace_matching_item(regexes, config_line, pwd_lookup, SALT) + assert _LINE_SCRUBBED_MESSAGE in processed_line + assert processed_line.endswith(whitespace) + + @pytest.mark.parametrize("config_line,sensitive_text", sensitive_lines) @pytest.mark.parametrize( "prepend_text",