From 4a119d6471b49c99f61b5a3ace688d8abfda2891 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 20 Jan 2026 14:46:29 -0500 Subject: [PATCH] regex --- .../check_operators/dataframe_operators.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 011835f65..b86e5572d 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -85,7 +85,19 @@ def __init__(self, data): def _assert_valid_value_and_cast(self, value): return value + def _regex_str_conversion(self, x): + """Convert value to string for regex operations. + Only converts non-null values. Returns NaN/None as-is. + """ + if pd.notna(x): + if isinstance(x, int): + return str(x).strip() + elif isinstance(x, float): + return f"{x:.0f}" if x.is_integer() else str(x).strip() + return x + def _custom_str_conversion(self, x): + """used to normalize numeric representations i.e. treat 200.00 as 200 for comparisons""" if pd.notna(x): if isinstance(x, str): try: @@ -750,7 +762,7 @@ def prefix_matches_regex(self, other_value): comparator = other_value.get("comparator") prefix = other_value.get("prefix") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & converted_strings.astype(str).map( lambda x: re.search(comparator, x[:prefix]) is not None @@ -764,7 +776,7 @@ def not_prefix_matches_regex(self, other_value): comparator = other_value.get("comparator") prefix = other_value.get("prefix") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & ~converted_strings.astype(str).map( lambda x: re.search(comparator, x[:prefix]) is not None @@ -778,7 +790,7 @@ def suffix_matches_regex(self, other_value): comparator = other_value.get("comparator") suffix = other_value.get("suffix") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & converted_strings.astype(str).map( lambda x: re.search(comparator, x[-suffix:]) is not None @@ -792,7 +804,7 @@ def not_suffix_matches_regex(self, other_value): comparator = other_value.get("comparator") suffix = other_value.get("suffix") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & ~converted_strings.astype(str).map( lambda x: re.search(comparator, x[-suffix:]) is not None @@ -805,7 +817,7 @@ def matches_regex(self, other_value): target = self.replace_prefix(other_value.get("target")) comparator = other_value.get("comparator") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & converted_strings.astype(str).str.match( comparator @@ -818,7 +830,7 @@ def not_matches_regex(self, other_value): target = self.replace_prefix(other_value.get("target")) comparator = other_value.get("comparator") converted_strings = self.value[target].map( - lambda x: self._custom_str_conversion(x) + lambda x: self._regex_str_conversion(x) ) results = converted_strings.notna() & ~converted_strings.astype(str).str.match( comparator