diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index d90bb8a55..7c98f1942 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -624,9 +624,16 @@ def contains_case_insensitive(self, other_value): comparison_data, self.value[target] ) elif self.value.is_series(comparison_data): - results = self._series_is_in( - self.convert_string_data_to_lower(self.value[target]), - self.convert_string_data_to_lower(comparison_data), + # column vs column case: perform element-wise case-insensitive substring check + target_series = self.convert_string_data_to_lower(self.value[target]) + comparison_series = self.convert_string_data_to_lower(comparison_data) + results = target_series.combine( + comparison_series, + lambda t, c: ( + vectorized_case_insensitive_is_in(c, [t])[0] + if pd.notna(t) and pd.notna(c) + else False + ), ) else: results = vectorized_case_insensitive_is_in( diff --git a/tests/unit/test_check_operators/test_containment_checks.py b/tests/unit/test_check_operators/test_containment_checks.py index cb997005b..eff4f419d 100644 --- a/tests/unit/test_check_operators/test_containment_checks.py +++ b/tests/unit/test_check_operators/test_containment_checks.py @@ -34,14 +34,20 @@ def test_contains(data, comparator, dataset_type, expected_result): "data,comparator,dataset_type,expected_result", [ ( - {"target": ["Ctt", "Btt", "A"], "VAR2": ["a", "btt", "lll"]}, + {"target": ["A", "Btt", "Ctt"], "VAR2": ["a", "btt", "lll"]}, "VAR2", DaskDataset, [True, True, False], ), ( - {"target": [["A", "B", "C"], ["A", "B", "L"], ["L", "Q", "R"]]}, - "l", + { + "target": [ + ["A", "B", "C"], + ["A", "left hind limb", "L"], + ["L", "NON-ULCERATED left hind limb", "R"], + ] + }, + "LEFT HIND LIMB", PandasDataset, [False, True, True], ), @@ -86,7 +92,7 @@ def test_does_not_contain(data, comparator, dataset_type, expected_result): "data,comparator,dataset_type,expected_result", [ ( - {"target": ["Ctt", "Btt", "A"], "VAR2": ["a", "btt", "lll"]}, + {"target": ["A", "Btt", "Ctt"], "VAR2": ["a", "btt", "lll"]}, "VAR2", DaskDataset, [False, False, True], @@ -600,3 +606,39 @@ def test_is_column_of_iterables(column_data, expected): dataframe_operator = DataframeType({"value": df}) result = dataframe_operator.is_column_of_iterables(df["col"]) assert result == expected + + +@pytest.mark.parametrize( + "data,target_col,comparator_col,dataset_type,expected_result", + [ + ( + { + "PMSTRESC": [ + "2.0", + "2.0", + "NON-ULCERATED left hind limb", + "LEFT LEG", + ], + "PMLOC": [ + "LEFT HIND LIMB ", + "LEFT HIND LIMB ", + "LEFT HIND LIMB", + "left leg", + ], + }, + "PMSTRESC", + "PMLOC", + PandasDataset, + [False, False, True, True], + ), + ], +) +def test_contains_case_insensitive_column_vs_column( + data, target_col, comparator_col, dataset_type, expected_result +): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.contains_case_insensitive( + {"target": target_col, "comparator": comparator_col} + ) + assert result.equals(df.convert_to_series(expected_result))