From bd475b45222d72a04b252d5d85a9ed11dc98e6ca Mon Sep 17 00:00:00 2001
From: Dan Halperin <dhalperi@amazon.com>
Date: Thu, 5 Feb 2026 12:04:30 -0800
Subject: [PATCH] Fix trailing newline loss when scrubbing sensitive lines

Regression from #225: the _split_line_preserve_whitespace function
incorrectly detected leading/trailing whitespace. When re.split is
called with a capturing group on a string with leading/trailing
whitespace, it produces empty strings at the boundaries:

    re.split(r"(\s+)", "  foo\n") -> ['', '  ', 'foo', '\n', '']

The code was checking parts[0] and parts[-1] for whitespace, but those
are empty strings. The actual whitespace is at parts[1] and parts[-2].

Fix: drop empty strings at boundaries before processing, simplifying
the leading/trailing whitespace detection.

----

Prompt:
```
Hey I ran netconan on ~/networks/2026-02-04-.../ and got this diff. Did we mess
up the newlines somewhere? We're now commenting out the closing brace.
```

commit-id:c8455062
---
 netconan/sensitive_item_removal.py        | 12 +++++++++---
 tests/unit/test_sensitive_item_removal.py | 11 +++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/netconan/sensitive_item_removal.py b/netconan/sensitive_item_removal.py
index b8ebbc6..7fac3be 100644
--- a/netconan/sensitive_item_removal.py
+++ b/netconan/sensitive_item_removal.py
@@ -439,14 +439,20 @@ def _split_line_preserve_whitespace(line):
     - parts: list of alternating whitespace and word tokens
     - word_indices: indices of the word (non-whitespace) elements in parts
     """
-    # Split on whitespace, keeping whitespace as separate list elements
+    # Split on whitespace, keeping whitespace as separate list elements.
+    # re.split produces empty strings at boundaries when the string starts/ends
+    # with a delimiter, so drop those to simplify subsequent logic.
     parts = re.split(r"(\s+)", line)
+    if parts and parts[0] == "":
+        parts = parts[1:]
+    if parts and parts[-1] == "":
+        parts = parts[:-1]
     # Identify which indices contain words (non-whitespace, non-empty)
     word_indices = [i for i, part in enumerate(parts) if part and not part.isspace()]
 
     # Derive leading/trailing whitespace from parts
     words = [parts[i] for i in word_indices]
-    leading = parts[0] if parts and parts[0] and parts[0].isspace() else ""
-    trailing = parts[-1] if parts and parts[-1] and parts[-1].isspace() else ""
+    leading = parts[0] if parts and parts[0].isspace() else ""
+    trailing = parts[-1] if parts and parts[-1].isspace() else ""
 
     return leading, words, trailing, parts, word_indices
diff --git a/tests/unit/test_sensitive_item_removal.py b/tests/unit/test_sensitive_item_removal.py
index 79e0bab..03b7376 100644
--- a/tests/unit/test_sensitive_item_removal.py
+++ b/tests/unit/test_sensitive_item_removal.py
@@ -679,6 +679,17 @@ def test_pwd_removal_preserve_trailing_whitespace(regexes, whitespace):
     assert processed_line.endswith(whitespace)
 
 
+@pytest.mark.parametrize("whitespace", [" ", "\t", "\n", " \t\n"])
+def test_line_scrub_preserve_trailing_whitespace(regexes, whitespace):
+    """Test trailing whitespace is preserved when line is scrubbed (encrypted-password case)."""
+    # encrypted-password triggers line scrubbing (sensitive_item_num=None)
+    config_line = "        encrypted-password SECRET{}".format(whitespace)
+    pwd_lookup = {}
+    processed_line = replace_matching_item(regexes, config_line, pwd_lookup, SALT)
+    assert _LINE_SCRUBBED_MESSAGE in processed_line
+    assert processed_line.endswith(whitespace)
+
+
 @pytest.mark.parametrize("config_line,sensitive_text", sensitive_lines)
 @pytest.mark.parametrize(
     "prepend_text",