Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cdisc_rules_engine/check_operators/dataframe_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,11 @@ def get_comparator_data(self, comparator, value_is_literal: bool = False):

@log_operator_execution
def is_column_of_iterables(self, column):
return self.value.is_series(column) and (
isinstance(column.iloc[0], list) or isinstance(column.iloc[0], set)
return self.value.is_series(column) and all(
val is not None
and not (isinstance(val, float) and pd.isna(val))
and isinstance(val, (list, set))
for val in column
)

@log_operator_execution
Expand Down
4 changes: 4 additions & 0 deletions resources/schema/Operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ Complement of `suffix_equal_to`

Will return True if the value in `value` is contained within the collection/iterable in the target column, or if there's an exact match for non-iterable data.

The operator checks if every value in a column is a list or set. If yes, it compares row-by-row. If any value is blank or a different type (like a string or number), it compares each value against the entire column instead.

Example:

```yaml
Expand Down Expand Up @@ -649,6 +651,8 @@ Testing whether individual values or string parts belong to specific lists or se

Value in `name` compared against a list in `value`. The list can have literal values or be a reference to a `$variable`.

This operator behaves similarly to `contains`. The key distinction: `contains` checks if comparator ∈ target, while `is_contained_by` checks if target ∈ comparator.

> ACTARM in ('Screen Failure', 'Not Assigned', 'Not Treated', 'Unplanned Treatment')

```yaml
Expand Down
94 changes: 94 additions & 0 deletions tests/unit/test_check_operators/test_containment_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,3 +417,97 @@ def test_is_not_contained_by_case_insensitive(
{"target": "target", "comparator": comparator}
)
assert result.equals(df.convert_to_series(expected_result))

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these tests look good


@pytest.mark.parametrize(
"data,comparator,dataset_type,expected_result",
[
(
{"target": [None, ["A", "B"], ["C", "D"]]},
"A",
PandasDataset,
[False, True, False],
),
(
{"target": [None, ["A", "B"], ["C", "D"]]},
"C",
PandasDataset,
[False, False, True],
),
(
{"target": [["A", "B"], None, ["C", "D"]]},
"A",
PandasDataset,
[True, False, False],
),
(
{"target": [["A", "B"], ["C", "D"], None]},
"A",
PandasDataset,
[True, False, False],
),
],
)
def test_contains_with_none_first_row(data, comparator, dataset_type, expected_result):
df = dataset_type.from_dict(data)
dataframe_operator = DataframeType({"value": df})
result = dataframe_operator.contains(
{"target": "target", "comparator": comparator, "value_is_literal": True}
)
assert result.equals(df.convert_to_series(expected_result))


@pytest.mark.parametrize(
"data,comparator,dataset_type,expected_result",
[
(
{"target": [None, ["A", "B"], ["C", "D"]]},
"A",
PandasDataset,
[True, False, True],
),
(
{"target": [None, ["A", "B"], ["C", "D"]]},
"C",
PandasDataset,
[True, True, False],
),
],
)
def test_does_not_contain_with_none_first_row(
data, comparator, dataset_type, expected_result
):
df = dataset_type.from_dict(data)
dataframe_operator = DataframeType({"value": df})
result = dataframe_operator.does_not_contain(
{"target": "target", "comparator": comparator, "value_is_literal": True}
)
assert result.equals(df.convert_to_series(expected_result))


@pytest.mark.parametrize(
"data,comparator,dataset_type,expected_result",
[
(
{"target": ["A", "B", "C"], "comparison": [None, ["A", "B"], ["C", "D"]]},
"comparison",
PandasDataset,
[False, False, False],
),
(
{"target": ["A", "B", "C"], "comparison": [["A", "B"], None, ["C", "D"]]},
"comparison",
PandasDataset,
[False, False, False],
),
],
)
def test_is_contained_by_with_none_in_comparison(
data, comparator, dataset_type, expected_result
):
df = dataset_type.from_dict(data)
dataframe_operator = DataframeType({"value": df})
result = dataframe_operator.is_contained_by(
{"target": "target", "comparator": comparator}
)
assert result.equals(df.convert_to_series(expected_result))
Loading