diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py index 7174f09e9..3cc8b938a 100644 --- a/cdisc_rules_engine/models/operation_params.py +++ b/cdisc_rules_engine/models/operation_params.py @@ -62,3 +62,4 @@ class OperationParams: value_is_reference: bool = False namespace: str = None delimiter: str = None + define_xml_path: str = None diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py index a045a323b..80c088a36 100644 --- a/cdisc_rules_engine/operations/base_operation.py +++ b/cdisc_rules_engine/operations/base_operation.py @@ -1,3 +1,6 @@ +import os + +from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.models.operation_params import OperationParams from cdisc_rules_engine.constants.permissibility import ( PERMISSIBLE, @@ -314,3 +317,16 @@ def _resolve_variable_name(variable_name, domain: str): if "--" in variable_name else variable_name ) + + def _get_define_contents(self): + define_path = ( + self.params.define_xml_path + if self.params.define_xml_path + else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) + ) + if not os.path.exists(define_path): + raise FileNotFoundError(f"Define XML file {define_path} not found") + define_contents = self.data_service.get_define_xml_contents( + dataset_name=define_path + ) + return define_contents diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py index f1db3e4a2..2b4190896 100644 --- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py +++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py @@ -1,4 +1,3 @@ -from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.models.external_dictionaries_container import ( DICTIONARY_VALIDATORS, DictionaryTypes, @@ -8,7 +7,6 @@ ) from .base_operation import BaseOperation from cdisc_rules_engine.exceptions.custom_exceptions import UnsupportedDictionaryType -import os class DefineDictionaryVersionValidator(BaseOperation): @@ -36,9 +34,7 @@ def _execute_operation(self) -> bool: whodrug_path=self.params.whodrug_path, loinc_path=self.params.loinc_path, ) - define_contents = self.data_service.get_define_xml_contents( - dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) - ) + define_contents = self._get_define_contents() define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) define_dictionary_version = define_reader.get_external_dictionary_version( self.params.external_dictionary_type diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py index ab5e73eea..c6312cb82 100644 --- a/cdisc_rules_engine/operations/define_variable_metadata.py +++ b/cdisc_rules_engine/operations/define_variable_metadata.py @@ -1,9 +1,7 @@ -from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( DefineXMLReaderFactory, ) from .base_operation import BaseOperation -import os class DefineVariableMetadata(BaseOperation): @@ -32,9 +30,7 @@ def _execute_operation(self): ... } """ - define_contents = self.data_service.get_define_xml_contents( - dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME) - ) + define_contents = self._get_define_contents() define_reader = DefineXMLReaderFactory.from_file_contents(define_contents) variables_metadata = define_reader.extract_variables_metadata( self.params.domain diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index 2d69de749..6a469f28a 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -399,6 +399,7 @@ def execute_rule( standard_substandard=self.standard_substandard, external_dictionaries=self.external_dictionaries, ct_packages=ct_packages, + define_xml_path=self.define_xml_path, ) dataset_variable = DatasetVariable( dataset, diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 407f67cb7..316cab635 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -234,11 +234,12 @@ def rule_applies_to_class( excluded_classes = classes.get("Exclude", []) is_included = True is_excluded = False + dataset_name = dataset_metadata.full_path if included_classes: if ALL_KEYWORD in included_classes: return True variables = self.data_service.get_variables_metadata( - dataset_name=dataset_metadata.full_path, datasets=datasets + dataset_name=dataset_name, datasets=datasets ).data.variable_name class_name = self.data_service.get_dataset_class( variables, @@ -252,7 +253,7 @@ def rule_applies_to_class( is_included = False if excluded_classes: variables = self.data_service.get_variables_metadata( - dataset_name=dataset_metadata.full_path, datasets=datasets + dataset_name=dataset_name, datasets=datasets ).data.variable_name class_name = self.data_service.get_dataset_class( variables, @@ -367,6 +368,7 @@ def perform_rule_operations( for ct_package_type in operation.get("ct_package_types", []) ], ct_version=operation.get("version"), + define_xml_path=kwargs.get("define_xml_path"), dataframe=dataset_copy, dataset_path=dataset_path, datasets=datasets, diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py new file mode 100644 index 000000000..b5b60572d --- /dev/null +++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py @@ -0,0 +1,186 @@ +import os +import subprocess + +import pytest +import json +from conftest import get_python_executable + + +@pytest.mark.regression +class TestCoreIssue1248: + @pytest.mark.parametrize( + "command,rules_report,num_issues", + [ + # define path provided will lead to successful execution + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dxp", + os.path.join( + "tests", + "resources", + "CoreIssue1248", + "define_subfolder", + "define.xml", + ), + "-ps", + "1", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"), + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "ISSUE REPORTED", + } + ], + 2, + ), + # JSON data file and no define.xml in same folder and no -dxp param will provide error + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "relrec.json"), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + # no define.xml in same path as data.xlsx file will provide error + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + # define.xml in same folder as the data.xls and no -dxp provided will provide error until + # in ExcelDataService dataset metadata creation full_path=dataset_name + ( + [ + f"{get_python_executable()}", + "-m", + "core", + "validate", + "-s", + "sdtmig", + "-v", + "3-2", + "-of", + "JSON", + "-lr", + os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"), + "-cs", + "-dp", + os.path.join( + "tests", + "resources", + "CoreIssue1248", + "data_and_define", + "data.xlsx", + ), + "-ps", + "1", + ], + [ + { + "core_id": "SD1129", + "version": "1", + "cdisc_rule_id": "", + "fda_rule_id": "", + "message": "TEST", + "status": "EXECUTION ERROR", + } + ], + 1, + ), + ], + ) + def test_define_path_used(self, command, rules_report, num_issues): + subprocess.run(command, check=True) + + # Get the latest created report file + files = os.listdir() + json_files = [ + file + for file in files + if file.startswith("CORE-Report-") and file.endswith(".json") + ] + json_report_path = sorted(json_files)[-1] + # Open the JSON report file + json_report = json.load(open(json_report_path)) + assert { + "Conformance_Details", + "Dataset_Details", + "Issue_Summary", + "Issue_Details", + "Rules_Report", + }.issubset(json_report.keys()) + assert len(json_report["Issue_Details"]) == num_issues + assert json_report["Rules_Report"] == rules_report + if os.path.exists(json_report_path): + os.remove(json_report_path) diff --git a/tests/resources/CoreIssue1248/data.xlsx b/tests/resources/CoreIssue1248/data.xlsx new file mode 100644 index 000000000..c994860b4 Binary files /dev/null and b/tests/resources/CoreIssue1248/data.xlsx differ diff --git a/tests/resources/CoreIssue1248/data_and_define/data.xlsx b/tests/resources/CoreIssue1248/data_and_define/data.xlsx new file mode 100644 index 000000000..c994860b4 Binary files /dev/null and b/tests/resources/CoreIssue1248/data_and_define/data.xlsx differ diff --git a/tests/resources/CoreIssue1248/data_and_define/define.xml b/tests/resources/CoreIssue1248/data_and_define/define.xml new file mode 100644 index 000000000..50bc56b7b --- /dev/null +++ b/tests/resources/CoreIssue1248/data_and_define/define.xml @@ -0,0 +1,146 @@ + + + + + CDISCPILOT01 + Study Data Tabulation Model Metadata Submission Guidelines Sample Study + CDISCPILOT01 + + + + + + + + + + + + + + Related Records + + + + + + + + + + + relrec.xpt + + + + + Study Identifier + + + + + + Related Domain Abbreviation + + + + + + + Unique Subject Identifier + + + + + + Identifying Variable + + + + + + Identifying Variable Value + + + + + + Relationship Type + + + + + + + Relationship Identifier + + + + + + + Adverse Events + + + + + + Disposition + + + + + + Death Details + + + + + + Findings About Events or Interventions + + + + + + + + + Many + + + + + + One + + + + + + + Annotated CRF + + + Reviewers Guide + + + + \ No newline at end of file diff --git a/tests/resources/CoreIssue1248/define_subfolder/define.xml b/tests/resources/CoreIssue1248/define_subfolder/define.xml new file mode 100644 index 000000000..50bc56b7b --- /dev/null +++ b/tests/resources/CoreIssue1248/define_subfolder/define.xml @@ -0,0 +1,146 @@ + + + + + CDISCPILOT01 + Study Data Tabulation Model Metadata Submission Guidelines Sample Study + CDISCPILOT01 + + + + + + + + + + + + + + Related Records + + + + + + + + + + + relrec.xpt + + + + + Study Identifier + + + + + + Related Domain Abbreviation + + + + + + + Unique Subject Identifier + + + + + + Identifying Variable + + + + + + Identifying Variable Value + + + + + + Relationship Type + + + + + + + Relationship Identifier + + + + + + + Adverse Events + + + + + + Disposition + + + + + + Death Details + + + + + + Findings About Events or Interventions + + + + + + + + + Many + + + + + + One + + + + + + + Annotated CRF + + + Reviewers Guide + + + + \ No newline at end of file diff --git a/tests/resources/CoreIssue1248/relrec.json b/tests/resources/CoreIssue1248/relrec.json new file mode 100644 index 000000000..e26b186cc --- /dev/null +++ b/tests/resources/CoreIssue1248/relrec.json @@ -0,0 +1,83 @@ +{ + "datasetJSONCreationDateTime": "2024-11-11T15:09:17", + "datasetJSONVersion": "1.1.0", + "fileOID": "www.cdisc.org/StudyMSGv2/1/Define-XML_2.1.0/2024-11-11/relrec", + "dbLastModifiedDateTime": "2020-08-21T09:14:25", + "originator": "CDISC SDTM MSG Team", + "sourceSystem": { + "name": "SAS on X64_10PRO", + "version": "9.0401M7" + }, + "studyOID": "cdisc.com/CDISCPILOT01", + "metaDataVersionOID": "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7", + "metaDataRef": "define.xml", + "itemGroupOID": "IG.RELREC", + "records": 6, + "name": "RELREC", + "label": "Related Records", + "columns": [ + { + "itemOID": "IT.RELREC.STUDYID", + "name": "STUDYID", + "label": "Study Identifier", + "dataType": "string", + "length": 12, + "keySequence": 1 + }, + { + "itemOID": "IT.RELREC.RDOMAIN", + "name": "RDOMAIN", + "label": "Related Domain Abbreviation", + "dataType": "string", + "length": 6, + "keySequence": 2 + }, + { + "itemOID": "IT.RELREC.USUBJID", + "name": "USUBJID", + "label": "Unique Subject Identifier", + "dataType": "string", + "length": 8, + "keySequence": 3 + }, + { + "itemOID": "IT.RELREC.IDVAR", + "name": "IDVAR", + "label": "Identifying Variable", + "dataType": "string", + "length": 200, + "keySequence": 4 + }, + { + "itemOID": "IT.RELREC.IDVARVAL", + "name": "IDVARVAL", + "label": "Identifying Variable Value", + "dataType": "string", + "length": 200, + "keySequence": 5 + }, + { + "itemOID": "IT.RELREC.RELTYPE", + "name": "RELTYPE", + "label": "Relationship Type", + "dataType": "string", + "length": 4 + }, + { + "itemOID": "IT.RELREC.RELID", + "name": "RELID", + "label": "Relationship Identifier", + "dataType": "string", + "length": 200, + "keySequence": 6 + } + ], + "rows": [ + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDS"], + ["CDISCPILOT01", "DS", "", "DSLNKID", "", "ONE", "AEDS"], + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDD"], + ["CDISCPILOT01", "DD", "", "DDLNKID", "", "ONE", "AEDD"], + ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEFA"], + ["CDISCPILOT01", "FA", "", "FALNKGRP", "", "MANY", "AEFA"] + ] +} diff --git a/tests/resources/CoreIssue1248/sample.yml b/tests/resources/CoreIssue1248/sample.yml new file mode 100644 index 000000000..529298774 --- /dev/null +++ b/tests/resources/CoreIssue1248/sample.yml @@ -0,0 +1,33 @@ +custom_id: SD1129 +Authorities: + - Organization: TEST + Standards: + - Name: SDTMIG + Version: "3.2" + Category: SD1129 +Check: + all: + - name: RELTYPE + operator: non_empty +Operations: + - id: $test + operator: define_variable_metadata + name: RELID + attribute_name: define_variable_label +Core: + Id: SD1129 + Status: Published + Version: 1 +Description: TEST +Executability: Fully Executable +Outcome: + Message: TEST +Rule Type: Record Data +Scope: + Classes: + Include: + - RELATIONSHIP + Domains: + Include: + - RELREC +Sensitivity: Record