diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py
index 7174f09e9..3cc8b938a 100644
--- a/cdisc_rules_engine/models/operation_params.py
+++ b/cdisc_rules_engine/models/operation_params.py
@@ -62,3 +62,4 @@ class OperationParams:
value_is_reference: bool = False
namespace: str = None
delimiter: str = None
+ define_xml_path: str = None
diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py
index a045a323b..80c088a36 100644
--- a/cdisc_rules_engine/operations/base_operation.py
+++ b/cdisc_rules_engine/operations/base_operation.py
@@ -1,3 +1,6 @@
+import os
+
+from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME
from cdisc_rules_engine.models.operation_params import OperationParams
from cdisc_rules_engine.constants.permissibility import (
PERMISSIBLE,
@@ -314,3 +317,16 @@ def _resolve_variable_name(variable_name, domain: str):
if "--" in variable_name
else variable_name
)
+
+ def _get_define_contents(self):
+ define_path = (
+ self.params.define_xml_path
+ if self.params.define_xml_path
+ else os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME)
+ )
+ if not os.path.exists(define_path):
+ raise FileNotFoundError(f"Define XML file {define_path} not found")
+ define_contents = self.data_service.get_define_xml_contents(
+ dataset_name=define_path
+ )
+ return define_contents
diff --git a/cdisc_rules_engine/operations/define_dictionary_version_validator.py b/cdisc_rules_engine/operations/define_dictionary_version_validator.py
index f1db3e4a2..2b4190896 100644
--- a/cdisc_rules_engine/operations/define_dictionary_version_validator.py
+++ b/cdisc_rules_engine/operations/define_dictionary_version_validator.py
@@ -1,4 +1,3 @@
-from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME
from cdisc_rules_engine.models.external_dictionaries_container import (
DICTIONARY_VALIDATORS,
DictionaryTypes,
@@ -8,7 +7,6 @@
)
from .base_operation import BaseOperation
from cdisc_rules_engine.exceptions.custom_exceptions import UnsupportedDictionaryType
-import os
class DefineDictionaryVersionValidator(BaseOperation):
@@ -36,9 +34,7 @@ def _execute_operation(self) -> bool:
whodrug_path=self.params.whodrug_path,
loinc_path=self.params.loinc_path,
)
- define_contents = self.data_service.get_define_xml_contents(
- dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME)
- )
+ define_contents = self._get_define_contents()
define_reader = DefineXMLReaderFactory.from_file_contents(define_contents)
define_dictionary_version = define_reader.get_external_dictionary_version(
self.params.external_dictionary_type
diff --git a/cdisc_rules_engine/operations/define_variable_metadata.py b/cdisc_rules_engine/operations/define_variable_metadata.py
index ab5e73eea..c6312cb82 100644
--- a/cdisc_rules_engine/operations/define_variable_metadata.py
+++ b/cdisc_rules_engine/operations/define_variable_metadata.py
@@ -1,9 +1,7 @@
-from cdisc_rules_engine.constants.define_xml_constants import DEFINE_XML_FILE_NAME
from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import (
DefineXMLReaderFactory,
)
from .base_operation import BaseOperation
-import os
class DefineVariableMetadata(BaseOperation):
@@ -32,9 +30,7 @@ def _execute_operation(self):
...
}
"""
- define_contents = self.data_service.get_define_xml_contents(
- dataset_name=os.path.join(self.params.directory_path, DEFINE_XML_FILE_NAME)
- )
+ define_contents = self._get_define_contents()
define_reader = DefineXMLReaderFactory.from_file_contents(define_contents)
variables_metadata = define_reader.extract_variables_metadata(
self.params.domain
diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py
index 2d69de749..6a469f28a 100644
--- a/cdisc_rules_engine/rules_engine.py
+++ b/cdisc_rules_engine/rules_engine.py
@@ -399,6 +399,7 @@ def execute_rule(
standard_substandard=self.standard_substandard,
external_dictionaries=self.external_dictionaries,
ct_packages=ct_packages,
+ define_xml_path=self.define_xml_path,
)
dataset_variable = DatasetVariable(
dataset,
diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py
index 407f67cb7..316cab635 100644
--- a/cdisc_rules_engine/utilities/rule_processor.py
+++ b/cdisc_rules_engine/utilities/rule_processor.py
@@ -234,11 +234,12 @@ def rule_applies_to_class(
excluded_classes = classes.get("Exclude", [])
is_included = True
is_excluded = False
+ dataset_name = dataset_metadata.full_path
if included_classes:
if ALL_KEYWORD in included_classes:
return True
variables = self.data_service.get_variables_metadata(
- dataset_name=dataset_metadata.full_path, datasets=datasets
+ dataset_name=dataset_name, datasets=datasets
).data.variable_name
class_name = self.data_service.get_dataset_class(
variables,
@@ -252,7 +253,7 @@ def rule_applies_to_class(
is_included = False
if excluded_classes:
variables = self.data_service.get_variables_metadata(
- dataset_name=dataset_metadata.full_path, datasets=datasets
+ dataset_name=dataset_name, datasets=datasets
).data.variable_name
class_name = self.data_service.get_dataset_class(
variables,
@@ -367,6 +368,7 @@ def perform_rule_operations(
for ct_package_type in operation.get("ct_package_types", [])
],
ct_version=operation.get("version"),
+ define_xml_path=kwargs.get("define_xml_path"),
dataframe=dataset_copy,
dataset_path=dataset_path,
datasets=datasets,
diff --git a/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py
new file mode 100644
index 000000000..b5b60572d
--- /dev/null
+++ b/tests/QARegressionTests/test_Issues/test_CoreIssue1248.py
@@ -0,0 +1,186 @@
+import os
+import subprocess
+
+import pytest
+import json
+from conftest import get_python_executable
+
+
+@pytest.mark.regression
+class TestCoreIssue1248:
+ @pytest.mark.parametrize(
+ "command,rules_report,num_issues",
+ [
+ # define path provided will lead to successful execution
+ (
+ [
+ f"{get_python_executable()}",
+ "-m",
+ "core",
+ "validate",
+ "-s",
+ "sdtmig",
+ "-v",
+ "3-2",
+ "-of",
+ "JSON",
+ "-lr",
+ os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"),
+ "-cs",
+ "-dxp",
+ os.path.join(
+ "tests",
+ "resources",
+ "CoreIssue1248",
+ "define_subfolder",
+ "define.xml",
+ ),
+ "-ps",
+ "1",
+ "-dp",
+ os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"),
+ ],
+ [
+ {
+ "core_id": "SD1129",
+ "version": "1",
+ "cdisc_rule_id": "",
+ "fda_rule_id": "",
+ "message": "TEST",
+ "status": "ISSUE REPORTED",
+ }
+ ],
+ 2,
+ ),
+ # JSON data file and no define.xml in same folder and no -dxp param will provide error
+ (
+ [
+ f"{get_python_executable()}",
+ "-m",
+ "core",
+ "validate",
+ "-s",
+ "sdtmig",
+ "-v",
+ "3-2",
+ "-of",
+ "JSON",
+ "-lr",
+ os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"),
+ "-cs",
+ "-dp",
+ os.path.join("tests", "resources", "CoreIssue1248", "relrec.json"),
+ "-ps",
+ "1",
+ ],
+ [
+ {
+ "core_id": "SD1129",
+ "version": "1",
+ "cdisc_rule_id": "",
+ "fda_rule_id": "",
+ "message": "TEST",
+ "status": "EXECUTION ERROR",
+ }
+ ],
+ 1,
+ ),
+ # no define.xml in same path as data.xlsx file will provide error
+ (
+ [
+ f"{get_python_executable()}",
+ "-m",
+ "core",
+ "validate",
+ "-s",
+ "sdtmig",
+ "-v",
+ "3-2",
+ "-of",
+ "JSON",
+ "-lr",
+ os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"),
+ "-cs",
+ "-dp",
+ os.path.join("tests", "resources", "CoreIssue1248", "data.xlsx"),
+ "-ps",
+ "1",
+ ],
+ [
+ {
+ "core_id": "SD1129",
+ "version": "1",
+ "cdisc_rule_id": "",
+ "fda_rule_id": "",
+ "message": "TEST",
+ "status": "EXECUTION ERROR",
+ }
+ ],
+ 1,
+ ),
+ # define.xml in same folder as the data.xls and no -dxp provided will provide error until
+ # in ExcelDataService dataset metadata creation full_path=dataset_name
+ (
+ [
+ f"{get_python_executable()}",
+ "-m",
+ "core",
+ "validate",
+ "-s",
+ "sdtmig",
+ "-v",
+ "3-2",
+ "-of",
+ "JSON",
+ "-lr",
+ os.path.join("tests", "resources", "CoreIssue1248", "sample.yml"),
+ "-cs",
+ "-dp",
+ os.path.join(
+ "tests",
+ "resources",
+ "CoreIssue1248",
+ "data_and_define",
+ "data.xlsx",
+ ),
+ "-ps",
+ "1",
+ ],
+ [
+ {
+ "core_id": "SD1129",
+ "version": "1",
+ "cdisc_rule_id": "",
+ "fda_rule_id": "",
+ "message": "TEST",
+ "status": "EXECUTION ERROR",
+ }
+ ],
+ 1,
+ ),
+ ],
+ )
+ def test_define_path_used(self, command, rules_report, num_issues):
+ subprocess.run(command, check=True)
+
+ # Get the latest created report file
+ files = os.listdir()
+ json_files = [
+ file
+ for file in files
+ if file.startswith("CORE-Report-") and file.endswith(".json")
+ ]
+ json_report_path = sorted(json_files)[-1]
+ # Open the JSON report file
+ json_report = json.load(open(json_report_path))
+ assert {
+ "Conformance_Details",
+ "Dataset_Details",
+ "Issue_Summary",
+ "Issue_Details",
+ "Rules_Report",
+ }.issubset(json_report.keys())
+ assert len(json_report["Issue_Details"]) == num_issues
+ assert json_report["Rules_Report"] == rules_report
+ if os.path.exists(json_report_path):
+ os.remove(json_report_path)
diff --git a/tests/resources/CoreIssue1248/data.xlsx b/tests/resources/CoreIssue1248/data.xlsx
new file mode 100644
index 000000000..c994860b4
Binary files /dev/null and b/tests/resources/CoreIssue1248/data.xlsx differ
diff --git a/tests/resources/CoreIssue1248/data_and_define/data.xlsx b/tests/resources/CoreIssue1248/data_and_define/data.xlsx
new file mode 100644
index 000000000..c994860b4
Binary files /dev/null and b/tests/resources/CoreIssue1248/data_and_define/data.xlsx differ
diff --git a/tests/resources/CoreIssue1248/data_and_define/define.xml b/tests/resources/CoreIssue1248/data_and_define/define.xml
new file mode 100644
index 000000000..50bc56b7b
--- /dev/null
+++ b/tests/resources/CoreIssue1248/data_and_define/define.xml
@@ -0,0 +1,146 @@
+
+
+
+
+ CDISCPILOT01
+ Study Data Tabulation Model Metadata Submission Guidelines Sample Study
+ CDISCPILOT01
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Related Records
+
+
+
+
+
+
+
+
+
+
+ relrec.xpt
+
+
+
+
+ Study Identifier
+
+
+
+
+
+ Related Domain Abbreviation
+
+
+
+
+
+
+ Unique Subject Identifier
+
+
+
+
+
+ Identifying Variable
+
+
+
+
+
+ Identifying Variable Value
+
+
+
+
+
+ Relationship Type
+
+
+
+
+
+
+ Relationship Identifier
+
+
+
+
+
+
+ Adverse Events
+
+
+
+
+
+ Disposition
+
+
+
+
+
+ Death Details
+
+
+
+
+
+ Findings About Events or Interventions
+
+
+
+
+
+
+
+
+ Many
+
+
+
+
+
+ One
+
+
+
+
+
+
+ Annotated CRF
+
+
+ Reviewers Guide
+
+
+
+
\ No newline at end of file
diff --git a/tests/resources/CoreIssue1248/define_subfolder/define.xml b/tests/resources/CoreIssue1248/define_subfolder/define.xml
new file mode 100644
index 000000000..50bc56b7b
--- /dev/null
+++ b/tests/resources/CoreIssue1248/define_subfolder/define.xml
@@ -0,0 +1,146 @@
+
+
+
+
+ CDISCPILOT01
+ Study Data Tabulation Model Metadata Submission Guidelines Sample Study
+ CDISCPILOT01
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Related Records
+
+
+
+
+
+
+
+
+
+
+ relrec.xpt
+
+
+
+
+ Study Identifier
+
+
+
+
+
+ Related Domain Abbreviation
+
+
+
+
+
+
+ Unique Subject Identifier
+
+
+
+
+
+ Identifying Variable
+
+
+
+
+
+ Identifying Variable Value
+
+
+
+
+
+ Relationship Type
+
+
+
+
+
+
+ Relationship Identifier
+
+
+
+
+
+
+ Adverse Events
+
+
+
+
+
+ Disposition
+
+
+
+
+
+ Death Details
+
+
+
+
+
+ Findings About Events or Interventions
+
+
+
+
+
+
+
+
+ Many
+
+
+
+
+
+ One
+
+
+
+
+
+
+ Annotated CRF
+
+
+ Reviewers Guide
+
+
+
+
\ No newline at end of file
diff --git a/tests/resources/CoreIssue1248/relrec.json b/tests/resources/CoreIssue1248/relrec.json
new file mode 100644
index 000000000..e26b186cc
--- /dev/null
+++ b/tests/resources/CoreIssue1248/relrec.json
@@ -0,0 +1,83 @@
+{
+ "datasetJSONCreationDateTime": "2024-11-11T15:09:17",
+ "datasetJSONVersion": "1.1.0",
+ "fileOID": "www.cdisc.org/StudyMSGv2/1/Define-XML_2.1.0/2024-11-11/relrec",
+ "dbLastModifiedDateTime": "2020-08-21T09:14:25",
+ "originator": "CDISC SDTM MSG Team",
+ "sourceSystem": {
+ "name": "SAS on X64_10PRO",
+ "version": "9.0401M7"
+ },
+ "studyOID": "cdisc.com/CDISCPILOT01",
+ "metaDataVersionOID": "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7",
+ "metaDataRef": "define.xml",
+ "itemGroupOID": "IG.RELREC",
+ "records": 6,
+ "name": "RELREC",
+ "label": "Related Records",
+ "columns": [
+ {
+ "itemOID": "IT.RELREC.STUDYID",
+ "name": "STUDYID",
+ "label": "Study Identifier",
+ "dataType": "string",
+ "length": 12,
+ "keySequence": 1
+ },
+ {
+ "itemOID": "IT.RELREC.RDOMAIN",
+ "name": "RDOMAIN",
+ "label": "Related Domain Abbreviation",
+ "dataType": "string",
+ "length": 6,
+ "keySequence": 2
+ },
+ {
+ "itemOID": "IT.RELREC.USUBJID",
+ "name": "USUBJID",
+ "label": "Unique Subject Identifier",
+ "dataType": "string",
+ "length": 8,
+ "keySequence": 3
+ },
+ {
+ "itemOID": "IT.RELREC.IDVAR",
+ "name": "IDVAR",
+ "label": "Identifying Variable",
+ "dataType": "string",
+ "length": 200,
+ "keySequence": 4
+ },
+ {
+ "itemOID": "IT.RELREC.IDVARVAL",
+ "name": "IDVARVAL",
+ "label": "Identifying Variable Value",
+ "dataType": "string",
+ "length": 200,
+ "keySequence": 5
+ },
+ {
+ "itemOID": "IT.RELREC.RELTYPE",
+ "name": "RELTYPE",
+ "label": "Relationship Type",
+ "dataType": "string",
+ "length": 4
+ },
+ {
+ "itemOID": "IT.RELREC.RELID",
+ "name": "RELID",
+ "label": "Relationship Identifier",
+ "dataType": "string",
+ "length": 200,
+ "keySequence": 6
+ }
+ ],
+ "rows": [
+ ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDS"],
+ ["CDISCPILOT01", "DS", "", "DSLNKID", "", "ONE", "AEDS"],
+ ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEDD"],
+ ["CDISCPILOT01", "DD", "", "DDLNKID", "", "ONE", "AEDD"],
+ ["CDISCPILOT01", "AE", "", "AELNKID", "", "ONE", "AEFA"],
+ ["CDISCPILOT01", "FA", "", "FALNKGRP", "", "MANY", "AEFA"]
+ ]
+}
diff --git a/tests/resources/CoreIssue1248/sample.yml b/tests/resources/CoreIssue1248/sample.yml
new file mode 100644
index 000000000..529298774
--- /dev/null
+++ b/tests/resources/CoreIssue1248/sample.yml
@@ -0,0 +1,33 @@
+custom_id: SD1129
+Authorities:
+ - Organization: TEST
+ Standards:
+ - Name: SDTMIG
+ Version: "3.2"
+ Category: SD1129
+Check:
+ all:
+ - name: RELTYPE
+ operator: non_empty
+Operations:
+ - id: $test
+ operator: define_variable_metadata
+ name: RELID
+ attribute_name: define_variable_label
+Core:
+ Id: SD1129
+ Status: Published
+ Version: 1
+Description: TEST
+Executability: Fully Executable
+Outcome:
+ Message: TEST
+Rule Type: Record Data
+Scope:
+ Classes:
+ Include:
+ - RELATIONSHIP
+ Domains:
+ Include:
+ - RELREC
+Sensitivity: Record