cdisc-org · gerrycampion · Nov 13, 2025 · Nov 11, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py
@@ -1048,16 +1048,26 @@ def non_empty_within_except_last_row(self, other_value: dict):
     @type_operator(FIELD_DATAFRAME)
     def contains_all(self, other_value: dict):
         target = self.replace_prefix(other_value.get("target"))
+        value_is_literal: bool = other_value.get("value_is_literal", False)
         comparator = other_value.get("comparator")
-        if isinstance(comparator, list):
-            # get column as array of values
-            values = flatten_list(self.value, comparator)
+        if self.is_column_of_iterables(
+            self.value[target]
+        ) and self.is_column_of_iterables(self.value[comparator]):
+            comparison_data = self.get_comparator_data(comparator, value_is_literal)
+            results = []
+            for i in range(len(self.value[target])):
+                target_val = self.value[target].iloc[i]
+                comp_val = comparison_data.iloc[i]
+                results.append(all(is_in(item, target_val) for item in comp_val))
         else:
-            comparator = self.replace_prefix(comparator)
-            values = self.value[comparator].unique()
-        return self.value.convert_to_series(
-            set(values).issubset(set(self.value[target].unique()))
-        )
+            if isinstance(comparator, list):
+                # get column as array of values
+                values = flatten_list(self.value, comparator)
+            else:
+                comparator = self.replace_prefix(comparator)
+                values = self.value[comparator].unique()
+            results = set(values).issubset(set(self.value[target].unique()))
+        return self.value.convert_to_series(results)
 
     @log_operator_execution
     @type_operator(FIELD_DATAFRAME)

diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py
@@ -58,3 +58,4 @@ class OperationParams:
     target: str = None
     value_is_reference: bool = False
     namespace: str = None
+    delimiter: str = None
diff --git a/cdisc_rules_engine/operations/operations_factory.py b/cdisc_rules_engine/operations/operations_factory.py
@@ -42,6 +42,7 @@
 from cdisc_rules_engine.operations.min_date import MinDate
 from cdisc_rules_engine.operations.minimum import Minimum
 from cdisc_rules_engine.operations.record_count import RecordCount
+from cdisc_rules_engine.operations.split_by import SplitBy
 from cdisc_rules_engine.operations.valid_external_dictionary_code import (
     ValidExternalDictionaryCode,
 )
@@ -121,6 +122,7 @@ class OperationsFactory(FactoryInterface):
         "domain_is_custom": DomainIsCustom,
         "domain_label": DomainLabel,
         "required_variables": RequiredVariables,
+        "split_by": SplitBy,
         "expected_variables": ExpectedVariables,
         "permissible_variables": PermissibleVariables,
         "study_domains": StudyDomains,

diff --git a/cdisc_rules_engine/operations/split_by.py b/cdisc_rules_engine/operations/split_by.py
@@ -0,0 +1,13 @@
+from cdisc_rules_engine.operations.base_operation import BaseOperation
+
+
+class SplitBy(BaseOperation):
+    def _execute_operation(self):
+        if not all((self.params.target, self.params.delimiter)):
+            raise ValueError(
+                f"name and delimiter are required params for operation {self.params.operation_name}"
+            )
+
+        return self.evaluation_dataset[self.params.target].str.split(
+            self.params.delimiter
+        )
diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py
@@ -422,6 +422,7 @@ def perform_rule_operations(
                 term_pref_term=operation.get("term_pref_term"),
                 namespace=operation.get("namespace"),
                 value_is_reference=operation.get("value_is_reference", False),
+                delimiter=operation.get("delimiter"),
             )
 
             # execute operation

diff --git a/resources/schema/Operations.json b/resources/schema/Operations.json
@@ -234,6 +234,13 @@
       "required": ["id", "operator"],
       "type": "object"
     },
+    {
+      "properties": {
+        "operator": { "const": "split_by" }
+      },
+      "required": ["id", "operator", "delimiter", "name"],
+      "type": "object"
+    },
     {
       "properties": {
         "operator": { "const": "study_domains" }

diff --git a/resources/schema/Operations.md b/resources/schema/Operations.md
@@ -1268,3 +1268,15 @@ Operations:
 ```
 
 Note that a local XSD file is required for validation. The file must be stored in the folder indicated by the value of the `LOCAL_XSD_FILE_DIR` default file path and the mapping between the namespace and the local XSD file's `sub-folder/name` must be included in the value of the `LOCAL_XSD_FILE_MAP` default file path.
+
+### split_by
+
+Splits a dataset column by a given delimiter
+
+```yaml
+Operations:
+  - name: PPSPEC
+    delimiter: ;
+    id: $ppspec_value
+    operator: split_by
+```
diff --git a/resources/schema/Operator.md b/resources/schema/Operator.md
@@ -746,6 +746,22 @@ True if all values in `value` are contained within the variable `name`.
     - "Unplanned Treatment"
 ```
 
+The operator also supports lists:
+
+```yaml
+- name: "$spec_codelist"
+  operator: "contains_all"
+  value: "$ppspec_value"
+```
+
+Where:
+
+| $spec_codelist              |   $ppspec_value    |
+| :-------------------------- | :----------------: |
+| ["CODE1", "CODE2", "CODE3"] | ["CODE1", "CODE2"] |
+| ["CODE1", "CODE2", "CODE3"] | ["CODE2", "CODE3"] |
+| ["CODE1", "CODE2", "CODE3"] |     ["CODE1"]      |
+
 ### not_contains_all
 
 Complement of `contains_all`
@@ -762,6 +778,22 @@ Complement of `contains_all`
     - "Unplanned Treatment"
 ```
 
+The operator also supports lists:
+
+```yaml
+- name: "$spec_codelist"
+  operator: "not_contains_all"
+  value: "$ppspec_value"
+```
+
+Where:
+
+| $spec_codelist              |   $ppspec_value    |
+| :-------------------------- | :----------------: |
+| ["CODE1", "CODE2", "CODE3"] | ["CODE1", "CODE2"] |
+| ["CODE1", "CODE2", "CODE3"] | ["CODE2", "CODE3"] |
+| ["CODE1", "CODE2", "CODE3"] |     ["CODE1"]      |
+
 ### shares_at_least_one_element_with
 
 Will raise an issue if at least one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).

diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue890.py b/tests/QARegressionTests/test_Issues/test_CoreIssue890.py
@@ -0,0 +1,162 @@
+import os
+import subprocess
+import unittest
+import openpyxl
+import pytest
+from conftest import get_python_executable
+from QARegressionTests.globals import (
+    issue_datails_sheet,
+    rules_report_sheet,
+    issue_sheet_record_column,
+    issue_sheet_variable_column,
+    issue_sheet_values_column,
+)
+
+
+@pytest.mark.regression
+class TestColumnConsistsOfDelimitedCodelists(unittest.TestCase):
+    def test_positive_dataset(self):
+        # Run the command in the terminal
+        command = [
+            f"{get_python_executable()}",
+            "-m",
+            "core",
+            "validate",
+            "-s",
+            "send",
+            "-v",
+            "1-0",
+            "-dp",
+            os.path.join(
+                "tests",
+                "resources",
+                "CoreIssue890",
+                "unit-test-coreid-SENDIG282-positive.json",
+            ),
+            "-lr",
+            os.path.join("tests", "resources", "CoreIssue890", "Rule.yml"),
+            "-ct",
+            "sendct-2025-09-26",
+        ]
+        subprocess.run(command, check=True)
+
+        # Get the latest created Excel file
+        files = os.listdir()
+        excel_files = [
+            file
+            for file in files
+            if file.startswith("CORE-Report-") and file.endswith(".xlsx")
+        ]
+        excel_file_path = sorted(excel_files)[-1]
+        # # Open the Excel file
+        workbook = openpyxl.load_workbook(excel_file_path)
+
+        # Go to the "Issue Details" sheet
+        sheet = workbook[issue_datails_sheet]
+
+        record_column = sheet[issue_sheet_record_column]
+        variables_column = sheet[issue_sheet_variable_column]
+        values_column = sheet[issue_sheet_values_column]
+
+        record_values = [cell.value for cell in record_column[1:]]
+        variables_values = [cell.value for cell in variables_column[1:]]
+        values_column_values = [cell.value for cell in values_column[1:]]
+
+        # Remove None values using list comprehension
+        record_values = [value for value in record_values if value is not None]
+        variables_values = [value for value in variables_values if value is not None]
+        values_column_values = [
+            value for value in values_column_values if value is not None
+        ]
+        rules_values = [
+            row for row in workbook[rules_report_sheet].iter_rows(values_only=True)
+        ][1:]
+        rules_values = [row for row in rules_values if any(row)]
+        # Perform the assertion
+        # Ensure only two negative values are caught
+        assert rules_values[0][0] == "CDISC.SENDIG.SEND282"
+        assert len(record_values) == 0
+        assert len(variables_values) == 0
+        assert len(values_column_values) == 0
+        if os.path.exists(excel_file_path):
+            os.remove(excel_file_path)
+
+    def test_negaive_dataset(self):
+        # Run the command in the terminal
+        command = [
+            f"{get_python_executable()}",
+            "-m",
+            "core",
+            "validate",
+            "-s",
+            "send",
+            "-v",
+            "1-0",
+            "-dp",
+            os.path.join(
+                "tests",
+                "resources",
+                "CoreIssue890",
+                "unit-test-coreid-SENDIG282-negative.json",
+            ),
+            "-lr",
+            os.path.join("tests", "resources", "CoreIssue890", "Rule.yml"),
+            "-ct",
+            "sendct-2025-09-26",
+        ]
+        subprocess.run(command, check=True)
+
+        # Get the latest created Excel file
+        files = os.listdir()
+        excel_files = [
+            file
+            for file in files
+            if file.startswith("CORE-Report-") and file.endswith(".xlsx")
+        ]
+        excel_file_path = sorted(excel_files)[-1]
+        # Open the Excel file
+        workbook = openpyxl.load_workbook(excel_file_path)
+
+        # --- Dataset Details ---
+        dataset_sheet = workbook["Dataset Details"]
+        dataset_values = [row for row in dataset_sheet.iter_rows(values_only=True)][1:]
+        dataset_values = [row for row in dataset_values if any(row)]
+        assert len(dataset_values) > 0
+        assert dataset_values[0][0] == "pp.xpt"
+        assert dataset_values[0][1] == "Pharmacokinetics Parameters"
+        assert dataset_values[0][-1] == 4
+
+        # --- Issue Summary ---
+        issue_summary_sheet = workbook["Issue Summary"]
+        summary_values = [
+            row for row in issue_summary_sheet.iter_rows(values_only=True)
+        ][1:]
+        summary_values = [row for row in summary_values if any(row)]
+        assert len(summary_values) > 0
+        assert summary_values[0][0] == "pp.xpt"
+        assert summary_values[0][1] == "CDISC.SENDIG.SEND282"
+        assert summary_values[0][3] == 2
+
+        # --- Issue Details ---
+        issue_details_sheet = workbook["Issue Details"]
+        details_values = [
+            row for row in issue_details_sheet.iter_rows(values_only=True)
+        ][1:]
+        details_values = [row for row in details_values if any(row)]
+        assert all(row[0] == "CDISC.SENDIG.SEND282" for row in details_values)
+        assert len(details_values) == 2
+
+        # --- Rules Report ---
+        rules_values = [
+            row for row in workbook["Rules Report"].iter_rows(values_only=True)
+        ][1:]
+        rules_values = [row for row in rules_values if any(row)]
+        assert len(rules_values) > 0
+        assert rules_values[0][0] == "CDISC.SENDIG.SEND282"
+
+        if os.path.exists(excel_file_path):
+            os.remove(excel_file_path)
+
+
+# if __name__ == "__main__":
+#     unittest.main()