diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index de335af09..a813a438e 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -44,7 +44,6 @@ from cdisc_rules_engine.interfaces.data_service_interface import ( DataServiceInterface, ) -from cdisc_rules_engine.exceptions.custom_exceptions import DomainNotFoundError class RuleProcessor: @@ -480,10 +479,9 @@ def _execute_operation( ), ) if domain_details is None: - raise DomainNotFoundError( - f"Operation {operation_params.operation_name} requires Domain " - f"{operation_params.domain} but Domain not found in dataset" - ) + logger.info(f"Domain {operation_params.domain} doesn't exist") + operation_params.dataframe[operation_params.operation_id] = None + return operation_params.dataframe filename = get_dataset_name_from_details(domain_details) file_path: str = os.path.join( get_directory_path(operation_params.dataset_path), diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue720.py b/tests/QARegressionTests/test_Issues/test_CoreIssue720.py new file mode 100644 index 000000000..239d2959c --- /dev/null +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue720.py @@ -0,0 +1,149 @@ +import os +import subprocess +import unittest + +import openpyxl +import pytest +from conftest import get_python_executable +from QARegressionTests.globals import ( + issue_datails_sheet, + rules_report_sheet, +) + + +@pytest.mark.regression +class TestCoreIssue720(unittest.TestCase): + def test_negative_dataset(self): + """Negative scenario: SPECIES missing -> expect one populated issue.""" + command = [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-dp", + os.path.join("tests", "resources", "CoreIssue720", "Invalid_datasets.json"), + "-lr", + os.path.join("tests", "resources", "CoreIssue720", "Rule.yml"), + ] + subprocess.run(command, check=True) + + excel_files = [ + f + for f in os.listdir() + if f.startswith("CORE-Report-") and f.endswith(".xlsx") + ] + assert excel_files, "No CORE report generated" + excel_file_path = sorted(excel_files)[-1] + wb = openpyxl.load_workbook(excel_file_path) + + # Conformance Details + assert "Conformance Details" in wb.sheetnames + conf_rows = [ + row for row in wb["Conformance Details"].iter_rows(values_only=True) + ] + assert conf_rows[6][0] == "Standard" + assert conf_rows[6][1] == "USDM" + assert conf_rows[7][0] == "Version" + assert conf_rows[7][1] == "V4.0" + + # Entity Details + entity_rows = [row for row in wb["Entity Details"].iter_rows(values_only=True)][ + 1: + ] + entity_rows = [r for r in entity_rows if any(r)] + assert len(entity_rows) >= 2 + assert entity_rows[0][0] == "DM" and entity_rows[0][1] == 4 + assert entity_rows[1][0] == "TS" and entity_rows[1][1] == 4 + + # Issue Summary + summary_rows = [row for row in wb["Issue Summary"].iter_rows(values_only=True)][ + 1: + ] + summary_rows = [r for r in summary_rows if any(r)] + assert summary_rows[0][0] == "dm.xpt" + assert summary_rows[0][1] == "CDISC.SENDIG.105" + assert summary_rows[0][3] == 1 + + # Issue Details + details_rows = [ + row for row in wb[issue_datails_sheet].iter_rows(values_only=True) + ][1:] + details_rows = [r for r in details_rows if any(r)] + assert details_rows, "Issue Details should have at least one populated row" + first_issue = details_rows[0] + assert first_issue[0] == "CDISC.SENDIG.105" + assert first_issue[1] == "SEND105" + assert "SPECIES in not present" in first_issue[2] + assert first_issue[3] == "fully executable" + assert first_issue[4] == "dm.xpt" + assert first_issue[7] == "$distinct_tsparmcd, $distinct_txparmcd, SPECIES" + assert "Not in dataset" in first_issue[8] + + # Rules Report + rules_rows = [ + row for row in wb[rules_report_sheet].iter_rows(values_only=True) + ][1:] + rules_rows = [r for r in rules_rows if any(r)] + assert rules_rows, "Rules Report must have at least one populated row" + assert rules_rows[0][0] == "CDISC.SENDIG.105" + assert rules_rows[0][-1] == "SUCCESS" + + if os.path.exists(excel_file_path): + os.remove(excel_file_path) + + def test_positive_dataset(self): + """Positive scenario: SPECIES present -> expect no issue rows.""" + command = [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "usdm", + "-v", + "4-0", + "-dp", + os.path.join("tests", "resources", "CoreIssue720", "Valid_datasets.json"), + "-lr", + os.path.join("tests", "resources", "CoreIssue720", "Rule.yml"), + ] + subprocess.run(command, check=True) + + excel_files = [ + f + for f in os.listdir() + if f.startswith("CORE-Report-") and f.endswith(".xlsx") + ] + assert excel_files, "No CORE report generated" + excel_file_path = sorted(excel_files)[-1] + wb = openpyxl.load_workbook(excel_file_path) + + # Issue Summary empty + summary_rows = [row for row in wb["Issue Summary"].iter_rows(values_only=True)][ + 1: + ] + summary_rows = [r for r in summary_rows if any(r)] + assert summary_rows == [] + + # Issue Details empty + details_rows = [ + row for row in wb[issue_datails_sheet].iter_rows(values_only=True) + ][1:] + details_rows = [r for r in details_rows if any(r)] + assert details_rows == [] + + # Rules Report has success row + rules_rows = [ + row for row in wb[rules_report_sheet].iter_rows(values_only=True) + ][1:] + rules_rows = [r for r in rules_rows if any(r)] + assert len(rules_rows) == 1 + assert rules_rows[0][0] == "CDISC.SENDIG.105" + assert rules_rows[0][-1] == "SUCCESS" + + if os.path.exists(excel_file_path): + os.remove(excel_file_path) diff --git a/tests/resources/CoreIssue720/Invalid_datasets.json b/tests/resources/CoreIssue720/Invalid_datasets.json new file mode 100644 index 000000000..e703c0a62 --- /dev/null +++ b/tests/resources/CoreIssue720/Invalid_datasets.json @@ -0,0 +1,149 @@ +{ + "datasets": [ + { + "filename": "dm.xpt", + "label": "Demographics", + "domain": "DM", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 12 + }, + { + "name": "DOMAIN", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "USUBJID", + "label": "Unique Subject Identifier", + "type": "Char", + "length": 20 + }, + { + "name": "SUBJID", + "label": "Subject Identifier for the Study", + "type": "Char", + "length": 12 + }, + { + "name": "RFSTDTC", + "label": "Subject Reference Start Date/Time", + "type": "Char", + "length": 10 + }, + { + "name": "RFENDTC", + "label": "Subject Reference End Date/Time", + "type": "Char", + "length": 10 + }, + { + "name": "RFXSTDTC", + "label": "Date/Time of First Study Treatment", + "type": "Char", + "length": 10 + }, + { + "name": "RFXENDTC", + "label": "Date/Time of Last Study Treatment", + "type": "Char", + "length": 10 + }, + { + "name": "ARMCD", + "label": "Planned Arm Code", + "type": "Char", + "length": 8 + }, + { + "name": "ARM", + "label": "Description of Planned Arm", + "type": "Char", + "length": 28 + }, + { + "name": "SETCD", + "label": "Set Code", + "type": "Char", + "length": 8 + } + ], + "records": { + "STUDYID": ["CDISCCORE01", "CDISCCORE01", "CDISCCORE01", "CDISCCORE01"], + "DOMAIN": ["DM", "DM", "DM", "DM"], + "USUBJID": [ + "015246-099-0000-00000", + "015246-099-0000-00001", + "015246-099-0000-00002", + "015246-099-0000-00003" + ], + "SUBJID": [ + "099000000000", + "099000000001", + "099000000002", + "099000000003" + ], + "RFSTDTC": ["", "", "", "2018-05-08T11:20"], + "RFENDTC": ["", "", "", "2018-08-29T10:35"], + "RFXSTDTC": ["", "", "", "2018-05-08T11:20"], + "RFXENDTC": ["", "", "", "2018-08-29T10:35"], + "ARMCD": ["", "", "", "IKD"], + "ARM": ["IKd", "", "", "IKd"], + "SETCD": ["SET1", "SET22", "SET333", "SET12345"] + } + }, + { + "filename": "ts.xpt", + "label": "Trial Summary", + "domain": "TS", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 12 + }, + { + "name": "DOMAIN", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "TSSEQ", + "label": "Sequence Number ", + "type": "Char", + "length": 8 + }, + { + "name": "TSGRPID", + "label": "Group Identifier", + "type": "Char", + "length": 8 + }, + { + "name": "TSPARMCD", + "label": "Trial Summary Parameter Short Name ", + "type": "Char", + "length": 8 + } + ], + "records": { + "STUDYID": ["CDISCCORE01", "CDISCCORE01", "CDISCCORE01", "CDISCCORE01"], + "DOMAIN": ["TS", "TS", "TS", "TS"], + "TSSEQ": ["1", "2", "3", "4"], + "TSGRPID": ["", "", "", ""], + "TSPARMCD": ["SSTYP", "SPECIESS", "ROUTE", "STITLE"] + } + } + ], + "standard": { + "product": "sendig", + "version": "3-1" + }, + "codelists": [] +} diff --git a/tests/resources/CoreIssue720/Rule.yml b/tests/resources/CoreIssue720/Rule.yml new file mode 100644 index 000000000..832e63d3b --- /dev/null +++ b/tests/resources/CoreIssue720/Rule.yml @@ -0,0 +1,66 @@ +Authorities: + - Organization: CDISC + Standards: + - Name: SENDIG + References: + - Citations: + - Cited Guidance: + If this variable is excluded in the DM domain, the information + must be present at a higher level (either Trial Sets or Trial + Summary). + Document: IG v3.1 + Item: Specification + Section: 5.1.1 + Origin: SEND Conformance Rules + Rule Identifier: + Id: "SEND105" + Version: "2" + Version: "5.0" + Version: "3.1" +Check: + all: + - name: $distinct_tsparmcd + operator: does_not_contain + value: SPECIES + value_is_literal: true + - name: $distinct_txparmcd + operator: does_not_contain + value: SPECIES + value_is_literal: true + - name: SPECIES + operator: not_exists +Core: + Id: CDISC.SENDIG.105 + Status: Draft + Version: "1" +Description: Raise an error when the variable SPECIES is not present in DM, and + there is no record TSPARMCD=SPECIES in TS and there is no record + TXPARMCD=SPECIES in TX +Executability: Fully Executable +Operations: + # gets the distinct values of TSPARMCD + - domain: TS + id: $distinct_tsparmcd + name: TSPARMCD + operator: distinct + - domain: TX + id: $distinct_txparmcd + name: TXPARMCD + operator: distinct +Outcome: + Message: + SPECIES in not present in DM and there is no record TSPARMCD=SPECIES in + TS and there is no record TXPARMCD=SPECIES in TX + Output Variables: + - $distinct_tsparmcd + - $distinct_txparmcd + - SPECIES +Rule Type: Record Data +Scope: + Classes: + Include: + - ALL + Domains: + Include: + - DM +Sensitivity: Dataset diff --git a/tests/resources/CoreIssue720/Valid_datasets.json b/tests/resources/CoreIssue720/Valid_datasets.json new file mode 100644 index 000000000..a6617d702 --- /dev/null +++ b/tests/resources/CoreIssue720/Valid_datasets.json @@ -0,0 +1,149 @@ +{ + "datasets": [ + { + "filename": "dm.xpt", + "label": "Demographics", + "domain": "DM", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 12 + }, + { + "name": "DOMAIN", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "USUBJID", + "label": "Unique Subject Identifier", + "type": "Char", + "length": 20 + }, + { + "name": "SUBJID", + "label": "Subject Identifier for the Study", + "type": "Char", + "length": 12 + }, + { + "name": "RFSTDTC", + "label": "Subject Reference Start Date/Time", + "type": "Char", + "length": 10 + }, + { + "name": "RFENDTC", + "label": "Subject Reference End Date/Time", + "type": "Char", + "length": 10 + }, + { + "name": "RFXSTDTC", + "label": "Date/Time of First Study Treatment", + "type": "Char", + "length": 10 + }, + { + "name": "RFXENDTC", + "label": "Date/Time of Last Study Treatment", + "type": "Char", + "length": 10 + }, + { + "name": "ARMCD", + "label": "Planned Arm Code", + "type": "Char", + "length": 8 + }, + { + "name": "ARM", + "label": "Description of Planned Arm", + "type": "Char", + "length": 28 + }, + { + "name": "SETCD", + "label": "Set Code", + "type": "Char", + "length": 8 + } + ], + "records": { + "STUDYID": ["CDISCCORE01", "CDISCCORE01", "CDISCCORE01", "CDISCCORE01"], + "DOMAIN": ["DM", "DM", "DM", "DM"], + "USUBJID": [ + "015246-099-0000-00000", + "015246-099-0000-00001", + "015246-099-0000-00002", + "015246-099-0000-00003" + ], + "SUBJID": [ + "099000000000", + "099000000001", + "099000000002", + "099000000003" + ], + "RFSTDTC": ["", "", "", "2018-05-08T11:20"], + "RFENDTC": ["", "", "", "2018-08-29T10:35"], + "RFXSTDTC": ["", "", "", "2018-05-08T11:20"], + "RFXENDTC": ["", "", "", "2018-08-29T10:35"], + "ARMCD": ["", "", "", "IKD"], + "ARM": ["IKd", "", "", "IKd"], + "SETCD": ["SET1", "SET22", "SET333", "SET12345"] + } + }, + { + "filename": "ts.xpt", + "label": "Trial Summary", + "domain": "TS", + "variables": [ + { + "name": "STUDYID", + "label": "Study Identifier", + "type": "Char", + "length": 12 + }, + { + "name": "DOMAIN", + "label": "Domain Abbreviation", + "type": "Char", + "length": 2 + }, + { + "name": "TSSEQ", + "label": "Sequence Number ", + "type": "Char", + "length": 8 + }, + { + "name": "TSGRPID", + "label": "Group Identifier", + "type": "Char", + "length": 8 + }, + { + "name": "TSPARMCD", + "label": "Trial Summary Parameter Short Name ", + "type": "Char", + "length": 8 + } + ], + "records": { + "STUDYID": ["CDISCCORE01", "CDISCCORE01", "CDISCCORE01", "CDISCCORE01"], + "DOMAIN": ["TS", "TS", "TS", "TS"], + "TSSEQ": ["1", "2", "3", "4"], + "TSGRPID": ["", "", "", ""], + "TSPARMCD": ["SSTYP", "SPECIES", "ROUTE", "STITLE"] + } + } + ], + "standard": { + "product": "sendig", + "version": "3-1" + }, + "codelists": [] +} diff --git a/tests/unit/test_utilities/test_rule_processor.py b/tests/unit/test_utilities/test_rule_processor.py index 7c7bfaea1..0ac4cb582 100644 --- a/tests/unit/test_utilities/test_rule_processor.py +++ b/tests/unit/test_utilities/test_rule_processor.py @@ -1155,3 +1155,41 @@ def test_duplicate_for_targets(): assert ( len([cond for cond in check[1] if cond["value"]["target"] == target]) == 1 ) + + +def test_operation_nonexistent_domain_sets_none(mock_data_service, caplog): + """Operation on a domain not present in datasets should create a column with None values + and log domain-missing message.""" + caplog.set_level("INFO") + df = PandasDataset.from_dict({"DOMAIN": ["LB", "LB"], "LBSEQ": [1, 2]}) + rule = { + "operations": [ + {"operator": "distinct", "domain": "AE", "name": "AESEQ", "id": "$ae_ids"} + ] + } + processor = RuleProcessor(mock_data_service, InMemoryCacheService()) + # Use SDTMDatasetMetadata objects (dict caused AttributeError: unsplit_name) + datasets_metadata = [ + SDTMDatasetMetadata(name="LB", filename="lb.xpt", first_record={"DOMAIN": "LB"}) + ] + result = processor.perform_rule_operations( + rule=rule, + dataset=df.copy(), + domain="LB", + datasets=datasets_metadata, + dataset_path="lb.xpt", + standard="sdtmig", + standard_version="3-1-2", + standard_substandard=None, + ) + # Assert new column created + assert "$ae_ids" in result.columns + # All values should be None + assert result["$ae_ids"].isnull().all() + # Original data preserved + assert result["LBSEQ"].tolist() == [1, 2] + # Log contains domain missing message + assert any( + "Domain AE" in rec.message and "doesn't exist" in rec.message + for rec in caplog.records + )