diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 0e8eda382..6c42d108d 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1042,16 +1042,26 @@ def non_empty_within_except_last_row(self, other_value: dict): @type_operator(FIELD_DATAFRAME) def contains_all(self, other_value: dict): target = self.replace_prefix(other_value.get("target")) + value_is_literal: bool = other_value.get("value_is_literal", False) comparator = other_value.get("comparator") - if isinstance(comparator, list): - # get column as array of values - values = flatten_list(self.value, comparator) + if self.is_column_of_iterables( + self.value[target] + ) and self.is_column_of_iterables(self.value[comparator]): + comparison_data = self.get_comparator_data(comparator, value_is_literal) + results = [] + for i in range(len(self.value[target])): + target_val = self.value[target].iloc[i] + comp_val = comparison_data.iloc[i] + results.append(all(is_in(item, comp_val) for item in target_val)) else: - comparator = self.replace_prefix(comparator) - values = self.value[comparator].unique() - return self.value.convert_to_series( - set(values).issubset(set(self.value[target].unique())) - ) + if isinstance(comparator, list): + # get column as array of values + values = flatten_list(self.value, comparator) + else: + comparator = self.replace_prefix(comparator) + values = self.value[comparator].unique() + results = set(values).issubset(set(self.value[target].unique())) + return self.value.convert_to_series(results) @log_operator_execution @type_operator(FIELD_DATAFRAME) diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py index 55907a959..1b67e4b60 100644 --- a/cdisc_rules_engine/models/operation_params.py +++ b/cdisc_rules_engine/models/operation_params.py @@ -59,3 +59,4 @@ class OperationParams: target: str = None value_is_reference: bool = False namespace: str = None + delimiter: str = None diff --git a/cdisc_rules_engine/operations/operations_factory.py b/cdisc_rules_engine/operations/operations_factory.py index 2a77adf09..801df7a08 100644 --- a/cdisc_rules_engine/operations/operations_factory.py +++ b/cdisc_rules_engine/operations/operations_factory.py @@ -42,6 +42,7 @@ from cdisc_rules_engine.operations.min_date import MinDate from cdisc_rules_engine.operations.minimum import Minimum from cdisc_rules_engine.operations.record_count import RecordCount +from cdisc_rules_engine.operations.split_by import SplitBy from cdisc_rules_engine.operations.valid_external_dictionary_code import ( ValidExternalDictionaryCode, ) @@ -121,6 +122,7 @@ class OperationsFactory(FactoryInterface): "domain_is_custom": DomainIsCustom, "domain_label": DomainLabel, "required_variables": RequiredVariables, + "split_by": SplitBy, "expected_variables": ExpectedVariables, "permissible_variables": PermissibleVariables, "study_domains": StudyDomains, diff --git a/cdisc_rules_engine/operations/split_by.py b/cdisc_rules_engine/operations/split_by.py new file mode 100644 index 000000000..03291daab --- /dev/null +++ b/cdisc_rules_engine/operations/split_by.py @@ -0,0 +1,8 @@ +from cdisc_rules_engine.operations.base_operation import BaseOperation + + +class SplitBy(BaseOperation): + def _execute_operation(self): + return self.params.dataframe[self.params.target].str.split( + self.params.delimiter + ) diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index de335af09..05b13d26e 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -423,6 +423,7 @@ def perform_rule_operations( term_pref_term=operation.get("term_pref_term"), namespace=operation.get("namespace"), value_is_reference=operation.get("value_is_reference", False), + delimiter=operation.get("delimiter"), ) # execute operation