From afa3e9be7e3dfad2d5e832311ea816859f5130e2 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 23 Jan 2026 09:49:41 -0500 Subject: [PATCH 1/6] progress --- .../services/data_services/base_data_service.py | 4 ++-- cdisc_rules_engine/utilities/sdtm_utilities.py | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cdisc_rules_engine/services/data_services/base_data_service.py b/cdisc_rules_engine/services/data_services/base_data_service.py index 6090e2e80..b8a0a824c 100644 --- a/cdisc_rules_engine/services/data_services/base_data_service.py +++ b/cdisc_rules_engine/services/data_services/base_data_service.py @@ -183,7 +183,7 @@ def get_dataset_class( name = class_data.get("name") if name: return convert_library_class_name_to_ct_class(name) - return self._handle_special_cases( + return self._handle_custom_domains( dataset, dataset_metadata, file_path, datasets ) @@ -229,7 +229,7 @@ def get_dataset_metadata( } return self.dataset_implementation.from_dict(metadata_to_return) - def _handle_special_cases( + def _handle_custom_domains( self, dataset: DatasetInterface, dataset_metadata: SDTMDatasetMetadata, diff --git a/cdisc_rules_engine/utilities/sdtm_utilities.py b/cdisc_rules_engine/utilities/sdtm_utilities.py index 20c8d9718..96b10d2ab 100644 --- a/cdisc_rules_engine/utilities/sdtm_utilities.py +++ b/cdisc_rules_engine/utilities/sdtm_utilities.py @@ -75,9 +75,12 @@ def get_variables_metadata_from_standard(domain, library_metadata): # noqa model_details = library_metadata.model_metadata is_custom = domain not in standard_details.get("domains", {}) variables_metadata = [] - IG_class_details, IG_domain_details = get_class_and_domain_metadata( - standard_details, domain - ) + if not is_custom: + IG_class_details, IG_domain_details = get_class_and_domain_metadata( + standard_details, domain + ) + else: + return class_name = convert_library_class_name_to_ct_class(IG_class_details.get("name")) model_class_details = get_class_metadata(model_details, class_name) # Both custom and standard General Observations pull from model From 6f0d72e9025809b5e3f68d3d8edfd57b781701f4 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 29 Jan 2026 14:04:00 -0500 Subject: [PATCH 2/6] initial push --- .../dataset_builders/base_dataset_builder.py | 5 +++ .../operations/base_operation.py | 14 ++++++- .../utilities/sdtm_utilities.py | 39 ++++++++++++++----- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py index 1e1fcd483..f21620a5c 100644 --- a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py @@ -202,6 +202,11 @@ def get_library_variables_metadata(self) -> DatasetInterface: variables: List[dict] = sdtm_utilities.get_variables_metadata_from_standard( domain=self.dataset_metadata.unsplit_name, library_metadata=self.library_metadata, + data_service=self.data_service, + dataset=self.get_dataset_contents(), + datasets=self.datasets, + dataset_metadata=self.dataset_metadata, + dataset_path=self.dataset_path, ) variables_metadata: dict = self.library_metadata.variables_metadata.get( domain, {} diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py index 53dc15bcb..2f35254a3 100644 --- a/cdisc_rules_engine/operations/base_operation.py +++ b/cdisc_rules_engine/operations/base_operation.py @@ -228,8 +228,15 @@ def _get_variables_metadata_from_standard(self) -> List[dict]: # self.params.domain is unsplit_name domain_for_library = self.params.domain return sdtm_utilities.get_variables_metadata_from_standard( - domain_for_library, - self.library_metadata, + domain=domain_for_library, + library_metadata=self.library_metadata, + data_service=self.data_service, + dataset=self.evaluation_dataset, + datatset_metadata=self.data_service.get_raw_dataset_metadata( + dataset_name=self.params.dataset_path, datasets=self.params.datasets + ), + datasets=self.params.datasets, + dataset_path=self.params.dataset_path, ) def get_allowed_variable_permissibility(self, variable_metadata: dict): @@ -286,6 +293,9 @@ def _get_variables_metadata_from_standard_model( dataset_path=self.params.dataset_path, data_service=self.data_service, library_metadata=self.library_metadata, + dataset_metadata=self.data_service.get_raw_dataset_metadata( + dataset_name=self.params.dataset_path, datasets=self.params.datasets + ), ) @staticmethod diff --git a/cdisc_rules_engine/utilities/sdtm_utilities.py b/cdisc_rules_engine/utilities/sdtm_utilities.py index 96b10d2ab..0eac84098 100644 --- a/cdisc_rules_engine/utilities/sdtm_utilities.py +++ b/cdisc_rules_engine/utilities/sdtm_utilities.py @@ -16,6 +16,7 @@ ) import copy from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata +from cdisc_rules_engine.models.dataset.dataset_interface import DatasetInterface from typing import Iterable, Tuple, List, Optional @@ -53,7 +54,15 @@ def get_tabulation_model_type_and_version(model_link: dict) -> Tuple: return model_type, model_version -def get_variables_metadata_from_standard(domain, library_metadata): # noqa +def get_variables_metadata_from_standard( # noqa + domain, + library_metadata, + data_service, + dataset: DatasetInterface, + dataset_metadata: SDTMDatasetMetadata, + dataset_path: str, + datasets: Iterable[SDTMDatasetMetadata], +): add_AP = False original_domain = domain if ( @@ -70,7 +79,6 @@ def get_variables_metadata_from_standard(domain, library_metadata): # noqa domain = domain[2:] original_domain = domain add_AP = True - standard_details = library_metadata.standard_metadata model_details = library_metadata.model_metadata is_custom = domain not in standard_details.get("domains", {}) @@ -79,9 +87,13 @@ def get_variables_metadata_from_standard(domain, library_metadata): # noqa IG_class_details, IG_domain_details = get_class_and_domain_metadata( standard_details, domain ) + class_name = convert_library_class_name_to_ct_class( + IG_class_details.get("name") + ) else: - return - class_name = convert_library_class_name_to_ct_class(IG_class_details.get("name")) + class_name = data_service._handle_custom_domains( + dataset, dataset_metadata, dataset_path, datasets + ) model_class_details = get_class_metadata(model_details, class_name) # Both custom and standard General Observations pull from model if is_custom or class_name in DETECTABLE_CLASSES: @@ -285,11 +297,13 @@ def get_variables_metadata_from_standard_model( # noqa dataset_path: str, data_service: DataServiceInterface, library_metadata: LibraryMetadataContainer, + dataset_metadata: SDTMDatasetMetadata, ) -> List[dict]: """ gets class via the IG then uses the class to get the variables via the model classes outside of general observation, we check the model for their definition if they are not there, differ to the standard definition of the domain + if custom, IDs class and uses class variables. """ add_AP = False original_domain = domain @@ -309,11 +323,18 @@ def get_variables_metadata_from_standard_model( # noqa add_AP = True standard_details = library_metadata.standard_metadata model_details = library_metadata.model_metadata - - IG_class_details, IG_domain_details = get_class_and_domain_metadata( - standard_details, domain - ) - class_name = convert_library_class_name_to_ct_class(IG_class_details.get("name")) + is_custom = domain not in standard_details.get("domains", {}) + if not is_custom: + IG_class_details, IG_domain_details = get_class_and_domain_metadata( + standard_details, domain + ) + class_name = convert_library_class_name_to_ct_class( + IG_class_details.get("name") + ) + else: + class_name = data_service._handle_custom_domains( + dataframe, dataset_metadata, dataset_path, datasets + ) if class_name in DETECTABLE_CLASSES: model_class_details = get_class_metadata(model_details, class_name) ( From 0bf011af6bb3999afdad7c1d6032287a16bc5911 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 29 Jan 2026 14:29:20 -0500 Subject: [PATCH 3/6] spelling --- cdisc_rules_engine/operations/base_operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py index 2f35254a3..e8913d011 100644 --- a/cdisc_rules_engine/operations/base_operation.py +++ b/cdisc_rules_engine/operations/base_operation.py @@ -232,7 +232,7 @@ def _get_variables_metadata_from_standard(self) -> List[dict]: library_metadata=self.library_metadata, data_service=self.data_service, dataset=self.evaluation_dataset, - datatset_metadata=self.data_service.get_raw_dataset_metadata( + dataset_metadata=self.data_service.get_raw_dataset_metadata( dataset_name=self.params.dataset_path, datasets=self.params.datasets ), datasets=self.params.datasets, From 4fe49954ed96c928b121f03f1887774baaf1575e Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 29 Jan 2026 16:05:49 -0500 Subject: [PATCH 4/6] tests --- .../test_get_dataset_filtered_variables.py | 32 +++- .../test_get_model_filtered_variables.py | 2 + .../test_library_model_column_order.py | 32 +++- tests/unit/test_utilities/test_sdtm_utils.py | 178 ++++++++++++++++-- 4 files changed, 215 insertions(+), 29 deletions(-) diff --git a/tests/unit/test_operations/test_get_dataset_filtered_variables.py b/tests/unit/test_operations/test_get_dataset_filtered_variables.py index 5ec81ad25..30c080619 100644 --- a/tests/unit/test_operations/test_get_dataset_filtered_variables.py +++ b/tests/unit/test_operations/test_get_dataset_filtered_variables.py @@ -96,6 +96,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -130,7 +131,7 @@ "VISITNUM": [1, 2, 1], "VISIT": ["Day 1", "Day 7", "Day 1"], }, - {"name": "AE"}, + {"name": "AE", "first_record": {"DOMAIN": "AE"}}, "role", "Timing", ["VISITNUM", "VISIT"], @@ -188,6 +189,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -210,7 +212,7 @@ "USUBJID": ["SUBJ001", "SUBJ002", "SUBJ003"], "AETERM": ["Headache", "Nausea", "Fatigue"], }, - {"name": "AE"}, + {"name": "AE", "first_record": {"DOMAIN": "AE"}}, "role", "Identifier", ["STUDYID", "DOMAIN", "USUBJID", "AETERM"], @@ -273,6 +275,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -298,7 +301,7 @@ "AETERM": ["Headache", "Nausea"], "AESEQ": [1, 2], }, - {"name": "AE"}, + {"name": "AE", "first_record": {"DOMAIN": "AE"}}, "role", "Identifier", ["STUDYID", "AESEQ"], @@ -349,6 +352,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -370,7 +374,7 @@ "AETERM": ["Headache", "Nausea"], "AESEQ": [1, 2], }, - {"name": "AE"}, + {"name": "AE", "first_record": {"DOMAIN": "AE"}}, "role", "Timing", [], @@ -438,6 +442,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"FA"}, "classes": [ { "name": FINDINGS_ABOUT, @@ -465,10 +470,10 @@ "FASEQ": [1, 2], "USUBJID": ["SUBJ001", "SUBJ002"], }, - {"name": "FA"}, + {"name": "FA", "first_record": {"DOMAIN": "FA"}}, "role", "Identifier", - ["STUDYID", "DOMAIN", "USUBJID"], + ["STUDYID", "DOMAIN", "FASEQ", "USUBJID"], ) @@ -638,6 +643,7 @@ def test_get_dataset_filtered_variables_dask( standard_metadata = { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -752,6 +758,7 @@ def test_get_dataset_filtered_variables_empty_dataset( standard_metadata = { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -759,7 +766,11 @@ def test_get_dataset_filtered_variables_empty_dataset( { "name": "AE", "datasetVariables": [ - {"name": "VISITNUM", "role": VariableRoles.TIMING.value}, + { + "name": "VISITNUM", + "role": VariableRoles.TIMING.value, + "ordinal": 1, + }, ], } ], @@ -859,6 +870,7 @@ def test_get_dataset_filtered_variables_invalid_key(operation_params: OperationP standard_metadata = { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -866,7 +878,11 @@ def test_get_dataset_filtered_variables_invalid_key(operation_params: OperationP { "name": "AE", "datasetVariables": [ - {"name": "AETERM", "role": VariableRoles.IDENTIFIER.value}, + { + "name": "AETERM", + "role": VariableRoles.IDENTIFIER.value, + "ordinal": 1, + }, ], } ], diff --git a/tests/unit/test_operations/test_get_model_filtered_variables.py b/tests/unit/test_operations/test_get_model_filtered_variables.py index b7ed33aa4..cd0553455 100644 --- a/tests/unit/test_operations/test_get_model_filtered_variables.py +++ b/tests/unit/test_operations/test_get_model_filtered_variables.py @@ -108,6 +108,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -235,6 +236,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", diff --git a/tests/unit/test_operations/test_library_model_column_order.py b/tests/unit/test_operations/test_library_model_column_order.py index 92aef31d3..087eac3db 100644 --- a/tests/unit/test_operations/test_library_model_column_order.py +++ b/tests/unit/test_operations/test_library_model_column_order.py @@ -76,6 +76,7 @@ standard_metadata = { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -117,16 +118,31 @@ def test_get_column_order_from_library(operation_params: OperationParams, datase operation_params.domain = "AE" operation_params.standard = "sdtmig" operation_params.standard_version = "3-4" + operation_params.datasets = [ + SDTMDatasetMetadata(name="AE", first_record={"DOMAIN": "AE"}) + ] # save model metadata to cache cache = InMemoryCacheService.get_instance() library_metadata = LibraryMetadataContainer( - standard_metadata=standard_metadata, model_metadata=model_metadata + standard_metadata=standard_metadata, # USE updated version + model_metadata=model_metadata, ) # execute operation - data_service = LocalDataService.get_instance( - cache_service=cache, config=ConfigService() + data_service = LocalDataService( + cache_service=cache, + config=ConfigService(), + reader_factory=DataReaderFactory(), + standard="sdtmig", + standard_version="3-4", + library_metadata=library_metadata, ) + + def mock_get_raw_metadata(*args, **kwargs): + return SDTMDatasetMetadata(name="AE", first_record={"DOMAIN": "AE"}) + + data_service.get_raw_dataset_metadata = mock_get_raw_metadata + operation = LibraryModelColumnOrder( operation_params, operation_params.dataframe, @@ -215,6 +231,7 @@ def test_get_column_order_from_library(operation_params: OperationParams, datase }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": FINDINGS_ABOUT, @@ -260,7 +277,9 @@ def test_get_findings_class_column_order_from_library( operation_params.domain = "AE" operation_params.standard = "sdtmig" operation_params.standard_version = "3-4" - operation_params.datasets = [SDTMDatasetMetadata(name="AE")] + operation_params.datasets = [ + SDTMDatasetMetadata(name="AE", first_record={"DOMAIN": "AE"}) + ] # save model metadata to cache cache = InMemoryCacheService.get_instance() @@ -276,6 +295,11 @@ def test_get_findings_class_column_order_from_library( standard_version="3-4", library_metadata=library_metadata, ) + + def mock_get_raw_metadata(*args, **kwargs): + return SDTMDatasetMetadata(name="AE", first_record={"DOMAIN": "AE"}) + + data_service.get_raw_dataset_metadata = mock_get_raw_metadata operation = LibraryModelColumnOrder( operation_params, operation_params.dataframe, diff --git a/tests/unit/test_utilities/test_sdtm_utils.py b/tests/unit/test_utilities/test_sdtm_utils.py index eab41855c..c937db5b9 100644 --- a/tests/unit/test_utilities/test_sdtm_utils.py +++ b/tests/unit/test_utilities/test_sdtm_utils.py @@ -8,6 +8,7 @@ get_variables_metadata_from_standard, get_variables_metadata_from_standard_model, ) +from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata @pytest.fixture @@ -26,7 +27,9 @@ def library_metadata(): @pytest.fixture def mock_data_service(): """Mock data service for tests that require it.""" - return Mock() + mock_service = Mock() + mock_service._handle_custom_domains = Mock(return_value=None) + return mock_service @pytest.fixture @@ -35,44 +38,110 @@ def mock_datasets(): return [] -def test_standard_domain_ae(library_metadata): - variables = get_variables_metadata_from_standard("AE", library_metadata) +@pytest.fixture +def mock_dataset(): + """Mock dataset for tests.""" + return Mock() + + +def test_standard_domain_ae( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): + dataset_metadata = SDTMDatasetMetadata(name="AE", first_record={"DOMAIN": "AE"}) + variables = get_variables_metadata_from_standard( + "AE", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/ae.xpt", + mock_datasets, + ) assert any(var["name"] == "STUDYID" for var in variables) assert any(var["name"] == "AETERM" for var in variables) assert any(var["name"] == "AESTDTC" for var in variables) -def test_standard_domain_dm(library_metadata): - variables = get_variables_metadata_from_standard("DM", library_metadata) +def test_standard_domain_dm( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): + dataset_metadata = SDTMDatasetMetadata(name="DM", first_record={"DOMAIN": "DM"}) + variables = get_variables_metadata_from_standard( + "DM", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/dm.xpt", + mock_datasets, + ) assert any(var["name"] == "USUBJID" for var in variables) assert any(var["name"] == "AGE" for var in variables) assert any(var["name"] == "SEX" for var in variables) -def test_findings_domain_lb(library_metadata): - variables = get_variables_metadata_from_standard("LB", library_metadata) +def test_findings_domain_lb( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): + dataset_metadata = SDTMDatasetMetadata(name="LB", first_record={"DOMAIN": "LB"}) + variables = get_variables_metadata_from_standard( + "LB", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/lb.xpt", + mock_datasets, + ) assert any(var["name"] == "STUDYID" for var in variables) assert any(var["name"] == "USUBJID" for var in variables) assert any(var["name"] == "LBTEST" for var in variables) assert any(var["name"] == "LBORRES" for var in variables) -def test_supp_domain(library_metadata): - variables = get_variables_metadata_from_standard("SUPPAE", library_metadata) +def test_supp_domain(library_metadata, mock_data_service, mock_dataset, mock_datasets): + dataset_metadata = SDTMDatasetMetadata(name="SUPPAE", first_record={"QNAM": "TEST"}) + variables = get_variables_metadata_from_standard( + "SUPPAE", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/suppae.xpt", + mock_datasets, + ) assert any(var["name"] == "STUDYID" for var in variables) assert any(var["name"] == "QNAM" for var in variables) assert any(var["name"] == "QLABEL" for var in variables) -def test_sq_domain(library_metadata): - variables = get_variables_metadata_from_standard("SQAE", library_metadata) +def test_sq_domain(library_metadata, mock_data_service, mock_dataset, mock_datasets): + dataset_metadata = SDTMDatasetMetadata(name="SQAE", first_record={"QNAM": "TEST"}) + variables = get_variables_metadata_from_standard( + "SQAE", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/sqae.xpt", + mock_datasets, + ) assert any(var["name"] == "STUDYID" for var in variables) assert any(var["name"] == "QNAM" for var in variables) assert any(var["name"] == "QLABEL" for var in variables) -def test_ap_domain(library_metadata): - variables = get_variables_metadata_from_standard("APDM", library_metadata) +def test_ap_domain(library_metadata, mock_data_service, mock_dataset, mock_datasets): + dataset_metadata = SDTMDatasetMetadata(name="APDM", first_record={"APID": "001"}) + variables = get_variables_metadata_from_standard( + "APDM", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/apdm.xpt", + mock_datasets, + ) assert any(var["name"] == "APID" for var in variables) assert not any(var["name"] == "USUBJID" for var in variables) assert any(var["name"] == "RSUBJID" for var in variables) @@ -80,16 +149,36 @@ def test_ap_domain(library_metadata): assert any(var["name"] == "DMDY" for var in variables) -def test_sqap_domain(library_metadata): - variables = get_variables_metadata_from_standard("SQAP", library_metadata) +def test_sqap_domain(library_metadata, mock_data_service, mock_dataset, mock_datasets): + dataset_metadata = SDTMDatasetMetadata(name="SQAP", first_record={"QNAM": "TEST"}) + variables = get_variables_metadata_from_standard( + "SQAP", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/sqap.xpt", + mock_datasets, + ) assert any(var["name"] == "APID" for var in variables) assert not any(var["name"] == "USUBJID" for var in variables) assert any(var["name"] == "RDOMAIN" for var in variables) -def test_findings_about_domain_fa(library_metadata): +def test_findings_about_domain_fa( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): """Test Findings About domain includes FINDINGS class variables.""" - variables = get_variables_metadata_from_standard("FA", library_metadata) + dataset_metadata = SDTMDatasetMetadata(name="FA", first_record={"DOMAIN": "FA"}) + variables = get_variables_metadata_from_standard( + "FA", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/fa.xpt", + mock_datasets, + ) assert any(var["name"] == "FATEST" for var in variables) assert any(var["name"] == "FAOBJ" for var in variables) @@ -97,6 +186,7 @@ def test_findings_about_domain_fa(library_metadata): # Tests for get_variables_metadata_from_standard_model def test_findings_domain_from_model(library_metadata, mock_data_service, mock_datasets): mock_dataframe = Mock() + dataset_metadata = SDTMDatasetMetadata(name="LB", first_record={"DOMAIN": "LB"}) variables = get_variables_metadata_from_standard_model( domain="LB", dataframe=mock_dataframe, @@ -104,6 +194,7 @@ def test_findings_domain_from_model(library_metadata, mock_data_service, mock_da dataset_path="/path/to/lb.xpt", data_service=mock_data_service, library_metadata=library_metadata, + dataset_metadata=dataset_metadata, ) assert any(var["name"] == "STUDYID" for var in variables) assert any(var["name"] == "LBTEST" for var in variables) @@ -112,6 +203,7 @@ def test_findings_domain_from_model(library_metadata, mock_data_service, mock_da def test_supp_domain_from_model(library_metadata, mock_data_service, mock_datasets): """Test retrieving variables for SUPP domain from model.""" mock_dataframe = Mock() + dataset_metadata = SDTMDatasetMetadata(name="SUPPAE", first_record={"QNAM": "TEST"}) variables = get_variables_metadata_from_standard_model( domain="SUPPAE", dataframe=mock_dataframe, @@ -119,6 +211,7 @@ def test_supp_domain_from_model(library_metadata, mock_data_service, mock_datase dataset_path="/path/to/suppae.xpt", data_service=mock_data_service, library_metadata=library_metadata, + dataset_metadata=dataset_metadata, ) assert any(var["name"] == "RDOMAIN" for var in variables) assert any(var["name"] == "IDVAR" for var in variables) @@ -127,6 +220,7 @@ def test_supp_domain_from_model(library_metadata, mock_data_service, mock_datase def test_sqap_domain_from_model(library_metadata, mock_data_service, mock_datasets): """Test retrieving variables for SUPP domain from model.""" mock_dataframe = Mock() + dataset_metadata = SDTMDatasetMetadata(name="SQAP", first_record={"QNAM": "TEST"}) variables = get_variables_metadata_from_standard_model( domain="SQAP", dataframe=mock_dataframe, @@ -134,6 +228,7 @@ def test_sqap_domain_from_model(library_metadata, mock_data_service, mock_datase dataset_path="/path/to/suppae.xpt", data_service=mock_data_service, library_metadata=library_metadata, + dataset_metadata=dataset_metadata, ) assert any(var["name"] == "RDOMAIN" for var in variables) assert any(var["name"] == "APID" for var in variables) @@ -142,6 +237,7 @@ def test_sqap_domain_from_model(library_metadata, mock_data_service, mock_datase def test_ap_domain_from_model(library_metadata, mock_data_service, mock_datasets): """Test AP domain excludes USUBJID and includes APID.""" mock_dataframe = Mock() + dataset_metadata = SDTMDatasetMetadata(name="APDM", first_record={"APID": "001"}) variables = get_variables_metadata_from_standard_model( domain="APDM", dataframe=mock_dataframe, @@ -149,8 +245,56 @@ def test_ap_domain_from_model(library_metadata, mock_data_service, mock_datasets dataset_path="/path/to/apdm.xpt", data_service=mock_data_service, library_metadata=library_metadata, + dataset_metadata=dataset_metadata, ) assert not any(var["name"] == "USUBJID" for var in variables) assert any(var["name"] == "APID" for var in variables) assert any(var["name"] == "AGE" for var in variables) assert any(var["name"] == "DMDY" for var in variables) + + +def test_custom_domain_events_class( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): + """Test custom domain detection and variable metadata retrieval for EVENTS class.""" + dataset_metadata = SDTMDatasetMetadata(name="ZZ", first_record={"DOMAIN": "ZZ"}) + mock_data_service._handle_custom_domains = Mock(return_value="EVENTS") + mock_dataset.columns = ["STUDYID", "DOMAIN", "USUBJID", "ZZTERM", "ZZSEQ"] + variables = get_variables_metadata_from_standard( + "ZZ", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/zz.xpt", + mock_datasets, + ) + mock_data_service._handle_custom_domains.assert_called_once() + assert any(var["name"] == "STUDYID" for var in variables) + assert any(var["name"] == "DOMAIN" for var in variables) + assert any(var["name"] == "ZZTERM" for var in variables) + assert any(var["name"] == "ZZSEQ" for var in variables) + + +def test_custom_domain_findings_class( + library_metadata, mock_data_service, mock_dataset, mock_datasets +): + """Test custom domain detection and variable metadata retrieval for FINDINGS class.""" + dataset_metadata = SDTMDatasetMetadata(name="XX", first_record={"DOMAIN": "XX"}) + mock_data_service._handle_custom_domains = Mock(return_value="FINDINGS") + mock_dataset.columns = ["STUDYID", "DOMAIN", "USUBJID", "XXTESTCD", "XXORRES"] + variables = get_variables_metadata_from_standard( + "XX", + library_metadata, + mock_data_service, + mock_dataset, + dataset_metadata, + "/path/to/xx.xpt", + mock_datasets, + ) + mock_data_service._handle_custom_domains.assert_called_once() + assert any(var["name"] == "STUDYID" for var in variables) + assert any(var["name"] == "DOMAIN" for var in variables) + assert any(var["name"] == "USUBJID" for var in variables) + assert any(var["name"] == "XXTESTCD" for var in variables) + assert any(var["name"] == "XXORRES" for var in variables) From 6186c24ed757f7b6eb81711eb2a5d735263842ce Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 29 Jan 2026 16:24:05 -0500 Subject: [PATCH 5/6] last test --- .../test_parent_library_model_column_order.py | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_operations/test_parent_library_model_column_order.py b/tests/unit/test_operations/test_parent_library_model_column_order.py index f07a2719e..6cef24791 100644 --- a/tests/unit/test_operations/test_parent_library_model_column_order.py +++ b/tests/unit/test_operations/test_parent_library_model_column_order.py @@ -89,6 +89,7 @@ }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE"}, "classes": [ { "name": "Events", @@ -118,7 +119,7 @@ def test_get_parent_column_order_from_library( SDTMDatasetMetadata( first_record={"DOMAIN": "AE"}, filename="ae.xpt", - full_path="ae.xpt", # Added full_path + full_path="ae.xpt", ) ] ae = PandasDataset.from_dict( @@ -145,13 +146,29 @@ def test_get_parent_column_order_from_library( library_metadata = LibraryMetadataContainer( standard_metadata=standard_metadata, model_metadata=model_metadata ) - # execute operation - data_service = LocalDataService.get_instance( - cache_service=cache, config=ConfigService() + + data_service = LocalDataService( + cache_service=cache, + config=ConfigService(), + reader_factory=DataReaderFactory(), + standard="sdtmig", + standard_version="3-4", + library_metadata=library_metadata, ) + + def mock_get_raw_metadata(dataset_name, **kwargs): + if "ae" in dataset_name.lower(): + return SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, + filename="ae.xpt", + full_path="ae.xpt", + ) + return SDTMDatasetMetadata(name="UNKNOWN") + + data_service.get_raw_dataset_metadata = mock_get_raw_metadata operation = ParentLibraryModelColumnOrder( operation_params, - data, # Pass data as evaluation_dataset parameter + data, cache, data_service, library_metadata, @@ -254,6 +271,7 @@ def test_get_parent_column_order_from_library( }, { "_links": {"model": {"href": "/mdr/sdtm/1-5"}}, + "domains": {"AE", "EC"}, "classes": [ { "name": FINDINGS_ABOUT, @@ -348,6 +366,24 @@ def test_get_parent_findings_class_column_order_from_library( standard_version="3-4", library_metadata=library_metadata, ) + + def mock_get_raw_metadata(dataset_name, **kwargs): + if "ae" in dataset_name.lower(): + return SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, + filename="ae.xpt", + full_path="ae.xpt", + ) + elif "ec" in dataset_name.lower(): + return SDTMDatasetMetadata( + first_record={"DOMAIN": "EC"}, + filename="ec.xpt", + full_path="ec.xpt", + ) + return SDTMDatasetMetadata(name="UNKNOWN") + + data_service.get_raw_dataset_metadata = mock_get_raw_metadata + operation = ParentLibraryModelColumnOrder( operation_params, data, From a07be58419d64024867e42701f688f043d0c5293 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 12 Feb 2026 12:11:28 -0500 Subject: [PATCH 6/6] update to give raw original dataset to handle custom domains --- cdisc_rules_engine/utilities/sdtm_utilities.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cdisc_rules_engine/utilities/sdtm_utilities.py b/cdisc_rules_engine/utilities/sdtm_utilities.py index 0eac84098..3d7304d8d 100644 --- a/cdisc_rules_engine/utilities/sdtm_utilities.py +++ b/cdisc_rules_engine/utilities/sdtm_utilities.py @@ -92,7 +92,10 @@ def get_variables_metadata_from_standard( # noqa ) else: class_name = data_service._handle_custom_domains( - dataset, dataset_metadata, dataset_path, datasets + data_service.get_dataset(dataset_name=dataset_metadata.full_path), + dataset_metadata, + dataset_path, + datasets, ) model_class_details = get_class_metadata(model_details, class_name) # Both custom and standard General Observations pull from model