Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions cdisc_rules_engine/check_operators/dataframe_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,16 +1048,26 @@ def non_empty_within_except_last_row(self, other_value: dict):
@type_operator(FIELD_DATAFRAME)
def contains_all(self, other_value: dict):
target = self.replace_prefix(other_value.get("target"))
value_is_literal: bool = other_value.get("value_is_literal", False)
comparator = other_value.get("comparator")
if isinstance(comparator, list):
# get column as array of values
values = flatten_list(self.value, comparator)
if self.is_column_of_iterables(
self.value[target]
) and self.is_column_of_iterables(self.value[comparator]):
comparison_data = self.get_comparator_data(comparator, value_is_literal)
results = []
for i in range(len(self.value[target])):
target_val = self.value[target].iloc[i]
comp_val = comparison_data.iloc[i]
results.append(all(is_in(item, target_val) for item in comp_val))
else:
comparator = self.replace_prefix(comparator)
values = self.value[comparator].unique()
return self.value.convert_to_series(
set(values).issubset(set(self.value[target].unique()))
)
if isinstance(comparator, list):
# get column as array of values
values = flatten_list(self.value, comparator)
else:
comparator = self.replace_prefix(comparator)
values = self.value[comparator].unique()
results = set(values).issubset(set(self.value[target].unique()))
return self.value.convert_to_series(results)

@log_operator_execution
@type_operator(FIELD_DATAFRAME)
Expand Down
1 change: 1 addition & 0 deletions cdisc_rules_engine/models/operation_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ class OperationParams:
target: str = None
value_is_reference: bool = False
namespace: str = None
delimiter: str = None
2 changes: 2 additions & 0 deletions cdisc_rules_engine/operations/operations_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from cdisc_rules_engine.operations.min_date import MinDate
from cdisc_rules_engine.operations.minimum import Minimum
from cdisc_rules_engine.operations.record_count import RecordCount
from cdisc_rules_engine.operations.split_by import SplitBy
from cdisc_rules_engine.operations.valid_external_dictionary_code import (
ValidExternalDictionaryCode,
)
Expand Down Expand Up @@ -121,6 +122,7 @@ class OperationsFactory(FactoryInterface):
"domain_is_custom": DomainIsCustom,
"domain_label": DomainLabel,
"required_variables": RequiredVariables,
"split_by": SplitBy,
"expected_variables": ExpectedVariables,
"permissible_variables": PermissibleVariables,
"study_domains": StudyDomains,
Expand Down
13 changes: 13 additions & 0 deletions cdisc_rules_engine/operations/split_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from cdisc_rules_engine.operations.base_operation import BaseOperation


class SplitBy(BaseOperation):
def _execute_operation(self):
if not all((self.params.target, self.params.delimiter)):
raise ValueError(
f"name and delimiter are required params for operation {self.params.operation_name}"
)

return self.evaluation_dataset[self.params.target].str.split(
self.params.delimiter
)
1 change: 1 addition & 0 deletions cdisc_rules_engine/utilities/rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ def perform_rule_operations(
term_pref_term=operation.get("term_pref_term"),
namespace=operation.get("namespace"),
value_is_reference=operation.get("value_is_reference", False),
delimiter=operation.get("delimiter"),
)

# execute operation
Expand Down
7 changes: 7 additions & 0 deletions resources/schema/Operations.json
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,13 @@
"required": ["id", "operator"],
"type": "object"
},
{
"properties": {
"operator": { "const": "split_by" }
},
"required": ["id", "operator", "delimiter", "name"],
"type": "object"
},
{
"properties": {
"operator": { "const": "study_domains" }
Expand Down
12 changes: 12 additions & 0 deletions resources/schema/Operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -1268,3 +1268,15 @@ Operations:
```

Note that a local XSD file is required for validation. The file must be stored in the folder indicated by the value of the `LOCAL_XSD_FILE_DIR` default file path and the mapping between the namespace and the local XSD file's `sub-folder/name` must be included in the value of the `LOCAL_XSD_FILE_MAP` default file path.

### split_by

Splits a dataset column by a given delimiter

```yaml
Operations:
- name: PPSPEC
delimiter: ;
id: $ppspec_value
operator: split_by
```
32 changes: 32 additions & 0 deletions resources/schema/Operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,22 @@ True if all values in `value` are contained within the variable `name`.
- "Unplanned Treatment"
```

The operator also supports lists:

```yaml
- name: "$spec_codelist"
operator: "contains_all"
value: "$ppspec_value"
```

Where:

| $spec_codelist | $ppspec_value |
| :-------------------------- | :----------------: |
| ["CODE1", "CODE2", "CODE3"] | ["CODE1", "CODE2"] |
| ["CODE1", "CODE2", "CODE3"] | ["CODE2", "CODE3"] |
| ["CODE1", "CODE2", "CODE3"] | ["CODE1"] |

### not_contains_all

Complement of `contains_all`
Expand All @@ -762,6 +778,22 @@ Complement of `contains_all`
- "Unplanned Treatment"
```

The operator also supports lists:

```yaml
- name: "$spec_codelist"
operator: "not_contains_all"
value: "$ppspec_value"
```

Where:

| $spec_codelist | $ppspec_value |
| :-------------------------- | :----------------: |
| ["CODE1", "CODE2", "CODE3"] | ["CODE1", "CODE2"] |
| ["CODE1", "CODE2", "CODE3"] | ["CODE2", "CODE3"] |
| ["CODE1", "CODE2", "CODE3"] | ["CODE1"] |

### shares_at_least_one_element_with

Will raise an issue if at least one of the values in `name` is the same as one of the values in `value`. See [shares_no_elements_with](#shares_no_elements_with).
Expand Down
162 changes: 162 additions & 0 deletions tests/QARegressionTests/test_Issues/test_CoreIssue890.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import os
import subprocess
import unittest
import openpyxl
import pytest
from conftest import get_python_executable
from QARegressionTests.globals import (
issue_datails_sheet,
rules_report_sheet,
issue_sheet_record_column,
issue_sheet_variable_column,
issue_sheet_values_column,
)


@pytest.mark.regression
class TestColumnConsistsOfDelimitedCodelists(unittest.TestCase):
def test_positive_dataset(self):
# Run the command in the terminal
command = [
f"{get_python_executable()}",
"-m",
"core",
"validate",
"-s",
"send",
"-v",
"1-0",
"-dp",
os.path.join(
"tests",
"resources",
"CoreIssue890",
"unit-test-coreid-SENDIG282-positive.json",
),
"-lr",
os.path.join("tests", "resources", "CoreIssue890", "Rule.yml"),
"-ct",
"sendct-2025-09-26",
]
subprocess.run(command, check=True)

# Get the latest created Excel file
files = os.listdir()
excel_files = [
file
for file in files
if file.startswith("CORE-Report-") and file.endswith(".xlsx")
]
excel_file_path = sorted(excel_files)[-1]
# # Open the Excel file
workbook = openpyxl.load_workbook(excel_file_path)

# Go to the "Issue Details" sheet
sheet = workbook[issue_datails_sheet]

record_column = sheet[issue_sheet_record_column]
variables_column = sheet[issue_sheet_variable_column]
values_column = sheet[issue_sheet_values_column]

record_values = [cell.value for cell in record_column[1:]]
variables_values = [cell.value for cell in variables_column[1:]]
values_column_values = [cell.value for cell in values_column[1:]]

# Remove None values using list comprehension
record_values = [value for value in record_values if value is not None]
variables_values = [value for value in variables_values if value is not None]
values_column_values = [
value for value in values_column_values if value is not None
]
rules_values = [
row for row in workbook[rules_report_sheet].iter_rows(values_only=True)
][1:]
rules_values = [row for row in rules_values if any(row)]
# Perform the assertion
# Ensure only two negative values are caught
assert rules_values[0][0] == "CDISC.SENDIG.SEND282"
assert len(record_values) == 0
assert len(variables_values) == 0
assert len(values_column_values) == 0
if os.path.exists(excel_file_path):
os.remove(excel_file_path)

def test_negaive_dataset(self):
# Run the command in the terminal
command = [
f"{get_python_executable()}",
"-m",
"core",
"validate",
"-s",
"send",
"-v",
"1-0",
"-dp",
os.path.join(
"tests",
"resources",
"CoreIssue890",
"unit-test-coreid-SENDIG282-negative.json",
),
"-lr",
os.path.join("tests", "resources", "CoreIssue890", "Rule.yml"),
"-ct",
"sendct-2025-09-26",
]
subprocess.run(command, check=True)

# Get the latest created Excel file
files = os.listdir()
excel_files = [
file
for file in files
if file.startswith("CORE-Report-") and file.endswith(".xlsx")
]
excel_file_path = sorted(excel_files)[-1]
# Open the Excel file
workbook = openpyxl.load_workbook(excel_file_path)

# --- Dataset Details ---
dataset_sheet = workbook["Dataset Details"]
dataset_values = [row for row in dataset_sheet.iter_rows(values_only=True)][1:]
dataset_values = [row for row in dataset_values if any(row)]
assert len(dataset_values) > 0
assert dataset_values[0][0] == "pp.xpt"
assert dataset_values[0][1] == "Pharmacokinetics Parameters"
assert dataset_values[0][-1] == 4

# --- Issue Summary ---
issue_summary_sheet = workbook["Issue Summary"]
summary_values = [
row for row in issue_summary_sheet.iter_rows(values_only=True)
][1:]
summary_values = [row for row in summary_values if any(row)]
assert len(summary_values) > 0
assert summary_values[0][0] == "pp.xpt"
assert summary_values[0][1] == "CDISC.SENDIG.SEND282"
assert summary_values[0][3] == 2

# --- Issue Details ---
issue_details_sheet = workbook["Issue Details"]
details_values = [
row for row in issue_details_sheet.iter_rows(values_only=True)
][1:]
details_values = [row for row in details_values if any(row)]
assert all(row[0] == "CDISC.SENDIG.SEND282" for row in details_values)
assert len(details_values) == 2

# --- Rules Report ---
rules_values = [
row for row in workbook["Rules Report"].iter_rows(values_only=True)
][1:]
rules_values = [row for row in rules_values if any(row)]
assert len(rules_values) > 0
assert rules_values[0][0] == "CDISC.SENDIG.SEND282"

if os.path.exists(excel_file_path):
os.remove(excel_file_path)


# if __name__ == "__main__":
# unittest.main()
Loading
Loading