diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 09ff499fb..272d27fdd 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -153,8 +153,11 @@ def get_comparator_data(self, comparator, value_is_literal: bool = False): @log_operator_execution def is_column_of_iterables(self, column): - return self.value.is_series(column) and ( - isinstance(column.iloc[0], list) or isinstance(column.iloc[0], set) + return self.value.is_series(column) and all( + val is not None + and not (isinstance(val, float) and pd.isna(val)) + and isinstance(val, (list, set)) + for val in column ) @log_operator_execution diff --git a/resources/schema/Operator.md b/resources/schema/Operator.md index b84d0e668..4b0b650b4 100644 --- a/resources/schema/Operator.md +++ b/resources/schema/Operator.md @@ -299,6 +299,8 @@ Complement of `suffix_equal_to` Will return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data. +The operator checks if every value in a column is a list or set. If yes, it compares row-by-row. If any value is blank or a different type (like a string or number), it compares each value against the entire column instead. + Example: ```yaml @@ -649,6 +651,8 @@ Testing whether individual values or string parts belong to specific lists or se Value in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`. +This operator behaves similarly to `contains`. The key distinction: `contains` checks if comparator ∈ target, while `is_contained_by` checks if target ∈ comparator. + > ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment') ```yaml diff --git a/tests/unit/test_check_operators/test_containment_checks.py b/tests/unit/test_check_operators/test_containment_checks.py index 4f32770d7..23529b3f7 100644 --- a/tests/unit/test_check_operators/test_containment_checks.py +++ b/tests/unit/test_check_operators/test_containment_checks.py @@ -417,3 +417,97 @@ def test_is_not_contained_by_case_insensitive( {"target": "target", "comparator": comparator} ) assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "A", + PandasDataset, + [False, True, False], + ), + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "C", + PandasDataset, + [False, False, True], + ), + ( + {"target": [["A", "B"], None, ["C", "D"]]}, + "A", + PandasDataset, + [True, False, False], + ), + ( + {"target": [["A", "B"], ["C", "D"], None]}, + "A", + PandasDataset, + [True, False, False], + ), + ], +) +def test_contains_with_none_first_row(data, comparator, dataset_type, expected_result): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.contains( + {"target": "target", "comparator": comparator, "value_is_literal": True} + ) + assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "A", + PandasDataset, + [True, False, True], + ), + ( + {"target": [None, ["A", "B"], ["C", "D"]]}, + "C", + PandasDataset, + [True, True, False], + ), + ], +) +def test_does_not_contain_with_none_first_row( + data, comparator, dataset_type, expected_result +): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.does_not_contain( + {"target": "target", "comparator": comparator, "value_is_literal": True} + ) + assert result.equals(df.convert_to_series(expected_result)) + + +@pytest.mark.parametrize( + "data,comparator,dataset_type,expected_result", + [ + ( + {"target": ["A", "B", "C"], "comparison": [None, ["A", "B"], ["C", "D"]]}, + "comparison", + PandasDataset, + [False, False, False], + ), + ( + {"target": ["A", "B", "C"], "comparison": [["A", "B"], None, ["C", "D"]]}, + "comparison", + PandasDataset, + [False, False, False], + ), + ], +) +def test_is_contained_by_with_none_in_comparison( + data, comparator, dataset_type, expected_result +): + df = dataset_type.from_dict(data) + dataframe_operator = DataframeType({"value": df}) + result = dataframe_operator.is_contained_by( + {"target": "target", "comparator": comparator} + ) + assert result.equals(df.convert_to_series(expected_result))