From 01db764fcff581128b117011f343166e14927a33 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Thu, 20 Nov 2025 18:19:56 -0500 Subject: [PATCH 1/3] Fix is_column_of_iterables to check all values instead of just first row --- .../check_operators/dataframe_operators.py | 7 +- resources/schema/Operator.md | 6 ++ .../test_containment_checks.py | 94 +++++++++++++++++++ 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 09ff499fb..272d27fdd 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -153,8 +153,11 @@ def get_comparator_data(self, comparator, value_is_literal: bool = False): @log_operator_execution def is_column_of_iterables(self, column): - return self.value.is_series(column) and ( - isinstance(column.iloc[0], list) or isinstance(column.iloc[0], set) + return self.value.is_series(column) and all( + val is not None + and not (isinstance(val, float) and pd.isna(val)) + and isinstance(val, (list, set)) + for val in column ) @log_operator_execution diff --git a/resources/schema/Operator.md b/resources/schema/Operator.md index 16e0f5a89..f81318f10 100644 --- a/resources/schema/Operator.md +++ b/resources/schema/Operator.md @@ -299,6 +299,8 @@ Complement of `suffix_equal_to` Will return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data. +The operator determines if a column contains iterables by checking that **all** values in the column are iterables (lists or sets), not `None`, and not `NaN`. If all values are iterables, the operator performs row-by-row comparison. If the column has mixed types (e.g., `None` and lists), the column is not treated as iterables and the operator uses value vs all column values logic. + Example: ```yaml @@ -311,6 +313,8 @@ Example: Complement of `contains`. Returns True when the value is NOT contained within the target collection. +The operator uses the same iterable detection logic as `contains`: all values must be iterables for the column to be treated as iterables. + ```yaml - name: "--TOXGR" operator: "does_not_contain" @@ -638,6 +642,8 @@ Testing whether individual values or string parts belong to specific lists or se Value in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`. +When the `value` parameter references a column variable (e.g., `$variable`), the operator determines if that column contains iterables by checking that **all** values in the column are iterables (lists or sets), not `None`, and not `NaN`. If all values are iterables, the operator performs row-by-row comparison. If the column has mixed types, the operator uses value vs all column values logic. + > ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') ```yaml diff --git a/tests/unit/test_check_operators/test_containment_checks.py b/tests/unit/test_check_operators/test_containment_checks.py index 4f32770d7..de17c706f 100644 --- a/tests/unit/test_check_operators/test_containment_checks.py +++ b/tests/unit/test_check_operators/test_containment_checks.py @@ -417,3 +417,97 @@ def test_is_not_contained_by_case_insensitive( {"target": "target", "comparator": comparator} ) assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "A", + PandasDataset, + [False, True, False], + ), + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "C", + PandasDataset, + [False, False, True], + ), + ( + {"target": [["A", "B"], None, ["C", "D"]]}, + "A", + PandasDataset, + [True, False, False], + ), + ( + {"target": [["A", "B"], ["C", "D"], None]}, + "A", + PandasDataset, + [True, False, False], + ), + ], +) +def test_contains_with_none_first_row(data, comparator, dataset_type, expected_result): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.contains( + {"target": "target", "comparator": comparator, "value_is_literal": True} + ) + assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "A", + PandasDataset, + [True, False, True], + ), + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "C", + PandasDataset, + [True, True, False], + ), + ], +) +def test_does_not_contain_with_none_first_row( + data, comparator, dataset_type, expected_result +): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.does_not_contain( + {"target": "target", "comparator": comparator, "value_is_literal": True} + ) + assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": ["A", "B", "C"], "comparison": [None, ["A", "B"], ["C", "D"]]}, + "comparison", + PandasDataset, + [False, True, True], + ), + ( + {"target": ["A", "B", "C"], "comparison": [["A", "B"], None, ["C", "D"]]}, + "comparison", + PandasDataset, + [True, False, True], + ), + ], +) +def test_is_contained_by_with_none_in_comparison( + data, comparator, dataset_type, expected_result +): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.is_contained_by( + {"target": "target", "comparator": comparator} + ) + assert result.equals(df.convert_to_series(expected_result)) From 755ea5ee021d5353460c01b6aa4b3c2f54fb06ab Mon Sep 17 00:00:00 2001 From: Rakesh Date: Thu, 20 Nov 2025 18:31:09 -0500 Subject: [PATCH 2/3] Unit test update --- tests/unit/test_check_operators/test_containment_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_check_operators/test_containment_checks.py b/tests/unit/test_check_operators/test_containment_checks.py index de17c706f..23529b3f7 100644 --- a/tests/unit/test_check_operators/test_containment_checks.py +++ b/tests/unit/test_check_operators/test_containment_checks.py @@ -492,13 +492,13 @@ def test_does_not_contain_with_none_first_row( {"target": ["A", "B", "C"], "comparison": [None, ["A", "B"], ["C", "D"]]}, "comparison", PandasDataset, - [False, True, True], + [False, False, False], ), ( {"target": ["A", "B", "C"], "comparison": [["A", "B"], None, ["C", "D"]]}, "comparison", PandasDataset, - [True, False, True], + [False, False, False], ), ], ) From 524ec6a76b6c00a8c38a0dcd6a62a790c3a47b94 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Sun, 23 Nov 2025 11:55:29 -0500 Subject: [PATCH 3/3] Documentation Update --- resources/schema/Operator.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/resources/schema/Operator.md b/resources/schema/Operator.md index f81318f10..720ac1a05 100644 --- a/resources/schema/Operator.md +++ b/resources/schema/Operator.md @@ -299,7 +299,7 @@ Complement of `suffix_equal_to` Will return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data. -The operator determines if a column contains iterables by checking that **all** values in the column are iterables (lists or sets), not `None`, and not `NaN`. If all values are iterables, the operator performs row-by-row comparison. If the column has mixed types (e.g., `None` and lists), the column is not treated as iterables and the operator uses value vs all column values logic. +The operator checks if every value in a column is a list or set. If yes, it compares row-by-row. If any value is blank or a different type (like a string or number), it compares each value against the entire column instead. Example: @@ -313,8 +313,6 @@ Example: Complement of `contains`. Returns True when the value is NOT contained within the target collection. -The operator uses the same iterable detection logic as `contains`: all values must be iterables for the column to be treated as iterables. - ```yaml - name: "--TOXGR" operator: "does_not_contain" @@ -642,7 +640,7 @@ Testing whether individual values or string parts belong to specific lists or se Value in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`. -When the `value` parameter references a column variable (e.g., `$variable`), the operator determines if that column contains iterables by checking that **all** values in the column are iterables (lists or sets), not `None`, and not `NaN`. If all values are iterables, the operator performs row-by-row comparison. If the column has mixed types, the operator uses value vs all column values logic. +This operator behaves similarly to `contains`. The key distinction: `contains` checks if comparator ∈ target, while `is_contained_by` checks if target ∈ comparator. > ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')