diff --git a/cdisc_rules_engine/dataset_builders/domain_list_with_define_builder.py b/cdisc_rules_engine/dataset_builders/domain_list_with_define_builder.py index 180603763..8ec2741c3 100644 --- a/cdisc_rules_engine/dataset_builders/domain_list_with_define_builder.py +++ b/cdisc_rules_engine/dataset_builders/domain_list_with_define_builder.py @@ -31,11 +31,11 @@ def build(self): records = [] for define_item in all_define_metadata: domain_name = define_item.get("define_dataset_name", "") + filename = domain_files.get(domain_name) record = { - "domain": domain_name, - "filename": domain_files.get(domain_name), **define_item, + "domain": domain_name if filename is not None else None, + "filename": filename, } records.append(record) - return self.dataset_implementation.from_records(records) diff --git a/cdisc_rules_engine/enums/domain_presence_values.py b/cdisc_rules_engine/enums/domain_presence_values.py index bdd5b687c..5e664c05f 100644 --- a/cdisc_rules_engine/enums/domain_presence_values.py +++ b/cdisc_rules_engine/enums/domain_presence_values.py @@ -4,3 +4,4 @@ class DomainPresenceValues(BaseEnum): DATASET = "STUDY" RECORD = "" + DOMAIN = "N/A" diff --git a/cdisc_rules_engine/enums/sensitivity.py b/cdisc_rules_engine/enums/sensitivity.py index 4d2189129..7ff7ea376 100644 --- a/cdisc_rules_engine/enums/sensitivity.py +++ b/cdisc_rules_engine/enums/sensitivity.py @@ -5,3 +5,4 @@ class Sensitivity(BaseEnum): DATASET = "Dataset" RECORD = "Record" GROUP = "Group" + STUDY = "Study" diff --git a/cdisc_rules_engine/models/actions.py b/cdisc_rules_engine/models/actions.py index 331a62ed7..2150d94c6 100644 --- a/cdisc_rules_engine/models/actions.py +++ b/cdisc_rules_engine/models/actions.py @@ -65,6 +65,7 @@ def generate_dataset_error_objects(self, message: str, results: pd.Series): ) if "domain presence" in self.rule.get("rule_type", "").lower(): error_object.dataset = DomainPresenceValues.DATASET.value + error_object.domain = DomainPresenceValues.DOMAIN.value for error in error_object.errors: error.dataset = DomainPresenceValues.DATASET.value error.row = DomainPresenceValues.RECORD.value @@ -177,6 +178,10 @@ def generate_targeted_error_object( # noqa: C901 errors_list = self._generate_errors_by_target_presence( data, targets_not_in_dataset, all_targets_missing, errors_df ) + elif self.rule.get("sensitivity") == Sensitivity.STUDY.value: + errors_list = self._generate_errors_by_target_presence( + data, targets_not_in_dataset, all_targets_missing, errors_df + ) elif self.rule.get("sensitivity") == Sensitivity.GROUP.value: grouping_variables = self.rule.get("grouping_variables", []) diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 05666741d..50f36b009 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -1,6 +1,8 @@ from copy import deepcopy from typing import Iterable, List, Union from dateutil.parser._parser import ParserError +import traceback + from business_rules import export_rule_data from business_rules.engine import run from cdisc_rules_engine.config import config as default_config @@ -58,7 +60,7 @@ ExternalDictionariesContainer, ) from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata -import traceback +from cdisc_rules_engine.enums.sensitivity import Sensitivity class RulesEngine: @@ -174,6 +176,8 @@ def validate_single_rule(self, rule: dict, datasets: Iterable[SDTMDatasetMetadat ) if limit_reached: break + if rule.get("sensitivity") == Sensitivity.STUDY.value: + results = self._collapse_to_study_result(results) return results def _update_total_errors_and_check_limit( @@ -671,3 +675,15 @@ def handle_validation_exceptions( # noqa message=message, status=ExecutionStatus.EXECUTION_ERROR.value, ) + + def _collapse_to_study_result(self, results: dict) -> dict: + """ + For study sensitivity rules, collapse all per-dataset results into a single + study-level result using the first non-skipped result as the representative. + """ + for key, dataset_results in results.items(): + for result in dataset_results: + if result.get("executionStatus") != ExecutionStatus.SKIPPED.value: + return {"study": [result]} + first_key = next(iter(results)) + return {"study": results[first_key]} diff --git a/resources/schema/rule/Rule_Type.md b/resources/schema/rule/Rule_Type.md index 9556730f6..bbf184d8f 100644 --- a/resources/schema/rule/Rule_Type.md +++ b/resources/schema/rule/Rule_Type.md @@ -192,7 +192,7 @@ all: One row per dataset defined in Define-XML: -- `domain` +- `domain` - The domain if the dataset exists, null otherwise - `filename` - The file name if dataset exists, null otherwise - `define_dataset_name` - `define_dataset_label` diff --git a/resources/schema/rule/Sensitivity.json b/resources/schema/rule/Sensitivity.json index 08c68d6bd..ba331d1a6 100644 --- a/resources/schema/rule/Sensitivity.json +++ b/resources/schema/rule/Sensitivity.json @@ -10,6 +10,9 @@ }, { "const": "Group" + }, + { + "const": "Study" } ], "markdownDescription": "Determines what level of granularity issues should be generated within the report" diff --git a/resources/schema/rule/Sensitivity.md b/resources/schema/rule/Sensitivity.md index 3229a521f..45ffb9d92 100644 --- a/resources/schema/rule/Sensitivity.md +++ b/resources/schema/rule/Sensitivity.md @@ -1,3 +1,7 @@ +## Study + +Will report once per submitted data study. Lends itself to Domain Presence Check rule types as these are cross-study checks that do not involve within-dataset checks. Reports one result per failing row in the dataset generated by the rule type, collapsed to a single study-level result. + ## Dataset Report one result per dataset generated by the `Rule Type`, where a dataset or record within the dataset matches the rule failure criteria diff --git a/resources/templates/report-template.xlsx b/resources/templates/report-template.xlsx index 41b3eb81a..227cdd1e7 100644 Binary files a/resources/templates/report-template.xlsx and b/resources/templates/report-template.xlsx differ diff --git a/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py b/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py index 70ccb61c3..3005e31cc 100644 --- a/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py +++ b/tests/unit/test_dataset_builders/test_domain_presence_define_builder.py @@ -199,7 +199,7 @@ ], }, { - "domain": "SE", + "domain": None, "filename": None, "define_dataset_name": "SE", "define_dataset_label": "Subject Elements", @@ -241,7 +241,7 @@ pd.DataFrame( [ { - "domain": "AE", + "domain": None, "filename": None, "define_dataset_name": "AE", "define_dataset_label": "Adverse Events", @@ -260,7 +260,7 @@ ], }, { - "domain": "DM", + "domain": None, "filename": None, "define_dataset_name": "DM", "define_dataset_label": "Demographics", @@ -279,7 +279,7 @@ ], }, { - "domain": "SE", + "domain": None, "filename": None, "define_dataset_name": "SE", "define_dataset_label": "Subject Elements", @@ -293,7 +293,7 @@ "define_dataset_variables": ["STUDYID", "USUBJID", "SESEQ"], }, { - "domain": "EC", + "domain": None, "filename": None, "define_dataset_name": "EC", "define_dataset_label": "Exposure as Collected", @@ -344,7 +344,11 @@ def test_domain_list_with_define_dataset_builder( if expected_results.empty: assert result_df.empty, f"Expected empty DataFrame for {test_description}" else: - assert list(result_df.columns) == list( + assert set(result_df.columns) == set( expected_results.columns ), f"Columns do not match for {test_description}" - pd.testing.assert_frame_equal(result_df, expected_results, check_dtype=False) + pd.testing.assert_frame_equal( + result_df.reindex(columns=expected_results.columns), + expected_results, + check_dtype=False, + ) diff --git a/tests/unit/test_rules_engine.py b/tests/unit/test_rules_engine.py index a068ac9a7..62a4bd040 100644 --- a/tests/unit/test_rules_engine.py +++ b/tests/unit/test_rules_engine.py @@ -1050,7 +1050,7 @@ def test_rule_with_domain_prefix_replacement(mock_get_dataset: MagicMock): { "executionStatus": ExecutionStatus.ISSUE_REPORTED.value, "dataset": "STUDY", - "domain": "AE", + "domain": "N/A", "variables": ["AE"], "message": "Domain AE exists", "errors": [