diff --git a/cdisc_rules_engine/dataset_builders/content_metadata_dataset_builder.py b/cdisc_rules_engine/dataset_builders/content_metadata_dataset_builder.py index a59d544b3..f79e05f65 100644 --- a/cdisc_rules_engine/dataset_builders/content_metadata_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/content_metadata_dataset_builder.py @@ -9,6 +9,8 @@ def build(self): dataset_location - Path to file dataset_name - Name of the dataset dataset_label - Label for the dataset + is_ap - Whether the domain is an AP domain + ap_suffix - The 2-character suffix from AP domains """ size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule) return self.data_service.get_dataset_metadata( diff --git a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py index 306a735e1..b048e3a0e 100644 --- a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py @@ -15,6 +15,8 @@ def build(self): dataset_name - Name of the dataset dataset_size - File size dataset_domain - Domain of the dataset + is_ap - Whether the domain is an AP domain + ap_suffix - The 2-character suffix from AP domains define_dataset_class - dataset class define_dataset_domain - dataset domain from define define_dataset_is_non_standard - whether a dataset is a standard diff --git a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py index d36838a5e..65b5eefb4 100644 --- a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py @@ -17,6 +17,8 @@ def build(self): dataset_name - Name of the dataset dataset_label - Label for the dataset dataset_domain - Domain of the dataset + is_ap - Whether the domain is an AP domain + ap_suffix - The 2-character suffix from AP domains Columns from Define XML: define_dataset_name - dataset name from define_xml @@ -85,6 +87,15 @@ def _get_define_xml_dataframe(self): return self.dataset_implementation(columns=define_col_order) return self.dataset_implementation.from_records(define_metadata) + def _ensure_required_columns(self, dataset_df, dataset_col_order): + if "dataset_size" not in dataset_df.columns: + dataset_df["dataset_size"] = None + if "is_ap" not in dataset_df.columns: + dataset_df["is_ap"] = False + if "ap_suffix" not in dataset_df.columns: + dataset_df["ap_suffix"] = "" + return self.dataset_implementation(dataset_df[dataset_col_order]) + def _get_dataset_dataframe(self): dataset_col_order = [ "dataset_size", @@ -92,6 +103,8 @@ def _get_dataset_dataframe(self): "dataset_name", "dataset_label", "dataset_domain", + "is_ap", + "ap_suffix", ] if len(self.datasets) == 0: @@ -126,7 +139,7 @@ def _get_dataset_dataframe(self): "domain": "dataset_name", } dataset_df = datasets.rename(columns=data_col_mapping) - if "dataset_size" not in dataset_df.columns: - dataset_df["dataset_size"] = None - dataset_df = self.dataset_implementation(dataset_df[dataset_col_order]) + dataset_df = self._ensure_required_columns( + dataset_df, dataset_col_order + ) return dataset_df diff --git a/cdisc_rules_engine/dataset_builders/dataset_metadata_values_builder.py b/cdisc_rules_engine/dataset_builders/dataset_metadata_values_builder.py index 202bbf31d..8fb48d9f3 100644 --- a/cdisc_rules_engine/dataset_builders/dataset_metadata_values_builder.py +++ b/cdisc_rules_engine/dataset_builders/dataset_metadata_values_builder.py @@ -16,6 +16,8 @@ def build(self): - dataset_location - Path to file - dataset_name - Name of the dataset - dataset_label - Label for the dataset + - is_ap - Whether the domain is an AP domain + - ap_suffix - The 2-character suffix from AP domains """ size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule) dataset_metadata = self.data_service.get_dataset_metadata( diff --git a/cdisc_rules_engine/models/sdtm_dataset_metadata.py b/cdisc_rules_engine/models/sdtm_dataset_metadata.py index bcec8c8d3..704a72cc9 100644 --- a/cdisc_rules_engine/models/sdtm_dataset_metadata.py +++ b/cdisc_rules_engine/models/sdtm_dataset_metadata.py @@ -12,21 +12,21 @@ class SDTMDatasetMetadata(DatasetMetadata): """ Examples - | name | unsplit_name | is_supp | domain | rdomain | - | -------- | ------------ | ------- | ------ | ------- | - | QS | QS | False | QS | None | - | QSX | QS | False | QS | None | - | QSXX | QS | False | QS | None | - | SUPPQS | SUPPQS | True | None | QS | - | SUPPQSX | SUPPQS | True | None | QS | - | SUPPQSXX | SUPPQS | True | None | QS | - | APQS | APQS | False | APQS | None | - | APQSX | APQS | False | APQS | None | - | APQSXX | APQS | False | APQS | None | - | SQAPQS | SQAPQS | True | None | APQS | - | SQAPQSX | SQAPQS | True | None | APQS | - | SQAPQSXX | SQAPQS | True | None | APQS | - | RELREC | RELREC | False | None | None | + | name | unsplit_name | is_supp | domain | rdomain | is_ap | ap_suffix | + | -------- | ------------ | ------- | ------ | ------- | ----- | --------- | + | QS | QS | False | QS | None | False | | + | QSX | QS | False | QS | None | False | | + | QSXX | QS | False | QS | None | False | | + | SUPPQS | SUPPQS | True | None | QS | False | | + | SUPPQSX | SUPPQS | True | None | QS | False | | + | SUPPQSXX | SUPPQS | True | None | QS | False | | + | APQS | APQS | False | APQS | None | True | QS | + | APQSX | APQS | False | APQS | None | True | QS | + | APQSXX | APQS | False | APQS | None | True | QS | + | SQAPQS | SQAPQS | True | None | APQS | True | | + | SQAPQSX | SQAPQS | True | None | APQS | True | | + | SQAPQSXX | SQAPQS | True | None | APQS | True | | + | RELREC | RELREC | False | None | None | False | | """ @property @@ -57,3 +57,32 @@ def unsplit_name(self) -> str: @property def is_split(self) -> bool: return self.name != self.unsplit_name + + @property + def is_ap(self) -> bool: + """ + Returns true if APID variable exists in first_record for non-supp datasets, + or if rdomain is exactly 4 characters and starts with AP for supp datasets. + """ + if self.is_supp: + return ( + isinstance(self.rdomain, str) + and len(self.rdomain) == 4 + and self.rdomain.startswith("AP") + ) + first_record = self.first_record or {} + return "APID" in first_record + + @property + def ap_suffix(self) -> str: + """ + Returns the 2-character suffix (characters 3-4) from AP domains. + Returns empty string if not an AP domain or for supp datasets. + """ + if not self.is_ap: + return "" + if self.is_supp: + return "" + if isinstance(self.domain, str) and len(self.domain) >= 4: + return self.domain[2:4] + return "" diff --git a/cdisc_rules_engine/operations/extract_metadata.py b/cdisc_rules_engine/operations/extract_metadata.py index b9c179a23..5709f22d3 100644 --- a/cdisc_rules_engine/operations/extract_metadata.py +++ b/cdisc_rules_engine/operations/extract_metadata.py @@ -1,4 +1,5 @@ import pandas as pd + from cdisc_rules_engine.operations.base_operation import BaseOperation diff --git a/cdisc_rules_engine/operations/operations_factory.py b/cdisc_rules_engine/operations/operations_factory.py index 801df7a08..d943144d1 100644 --- a/cdisc_rules_engine/operations/operations_factory.py +++ b/cdisc_rules_engine/operations/operations_factory.py @@ -30,6 +30,7 @@ from cdisc_rules_engine.operations.mean import Mean from cdisc_rules_engine.operations.domain_is_custom import DomainIsCustom from cdisc_rules_engine.operations.domain_label import DomainLabel +from cdisc_rules_engine.operations.standard_domains import StandardDomains from cdisc_rules_engine.operations.meddra_code_references_validator import ( MedDRACodeReferencesValidator, ) @@ -121,6 +122,7 @@ class OperationsFactory(FactoryInterface): "variable_is_null": VariableIsNull, "domain_is_custom": DomainIsCustom, "domain_label": DomainLabel, + "standard_domains": StandardDomains, "required_variables": RequiredVariables, "split_by": SplitBy, "expected_variables": ExpectedVariables, diff --git a/cdisc_rules_engine/operations/standard_domains.py b/cdisc_rules_engine/operations/standard_domains.py new file mode 100644 index 000000000..43b6d453b --- /dev/null +++ b/cdisc_rules_engine/operations/standard_domains.py @@ -0,0 +1,15 @@ +from cdisc_rules_engine.operations.base_operation import BaseOperation + + +class StandardDomains(BaseOperation): + def _execute_operation(self): + standard_data: dict = self.library_metadata.standard_metadata + domains = standard_data.get("domains", set()) + if isinstance(domains, (set, list, tuple)): + return sorted(list(domains)) + elif domains is None: + return [] + raise TypeError( + f"Invalid type for 'domains' in standard_metadata: " + f"expected set, list, or tuple, got {type(domains).__name__}" + ) diff --git a/cdisc_rules_engine/services/data_services/base_data_service.py b/cdisc_rules_engine/services/data_services/base_data_service.py index c88109ffc..6090e2e80 100644 --- a/cdisc_rules_engine/services/data_services/base_data_service.py +++ b/cdisc_rules_engine/services/data_services/base_data_service.py @@ -223,6 +223,9 @@ def get_dataset_metadata( "dataset_name": [dataset_metadata.name], "dataset_label": [dataset_metadata.label], "record_count": [dataset_metadata.record_count], + "is_ap": [dataset_metadata.is_ap], + "ap_suffix": [dataset_metadata.ap_suffix], + "domain": [dataset_metadata.domain], } return self.dataset_implementation.from_dict(metadata_to_return) @@ -243,38 +246,37 @@ def _handle_special_cases( if self._contains_topic_variable(dataset, dataset_metadata.domain, "OBJ"): return FINDINGS_ABOUT return FINDINGS - if self._is_associated_persons(dataset): + if dataset_metadata.is_ap: return self._get_associated_persons_inherit_class( - file_path, datasets, dataset_metadata.domain + file_path, datasets, dataset_metadata ) return None - def _is_associated_persons(self, dataset) -> bool: - """ - Check if AP-- domain. - """ - return "APID" in dataset - def _get_associated_persons_inherit_class( - self, file_path, datasets: Iterable[SDTMDatasetMetadata], domain: str + self, + file_path, + datasets: Iterable[SDTMDatasetMetadata], + dataset_metadata: SDTMDatasetMetadata, ): """ Check with inherit class AP-- belongs to. """ - ap_suffix = domain[2:] + ap_suffix = dataset_metadata.ap_suffix + if not ap_suffix: + return None directory_path = get_directory_path(file_path) if len(datasets) > 1: domain_details: SDTMDatasetMetadata = search_in_list_of_dicts( datasets, lambda item: item.domain == ap_suffix ) if domain_details: + if domain_details.is_ap: + raise ValueError("Nested Associated Persons domain reference") file_name = domain_details.filename new_file_path = os.path.join(directory_path, file_name) new_domain_dataset = self.get_dataset(dataset_name=new_file_path) else: raise ValueError("Filename for domain doesn't exist") - if self._is_associated_persons(new_domain_dataset): - raise ValueError("Nested Associated Persons domain reference") return self.get_dataset_class( new_domain_dataset, new_file_path, diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index c78236484..221b04c7f 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -35,7 +35,6 @@ from cdisc_rules_engine.utilities.utils import ( get_directory_path, get_operations_cache_key, - is_ap_domain, search_in_list_of_dicts, get_dataset_name_from_details, ) @@ -182,12 +181,7 @@ def _domain_matched_ap_or_supp( supp_ap_domains.update({f"{AP_DOMAIN}--", f"{APFA_DOMAIN}--"}) return any(set(domains_to_check).intersection(supp_ap_domains)) and ( - dataset_metadata.is_supp - or is_ap_domain( - dataset_metadata.domain - or dataset_metadata.rdomain - or dataset_metadata.name - ) + dataset_metadata.is_supp or dataset_metadata.is_ap ) def rule_applies_to_data_structure( diff --git a/resources/schema/Operations.json b/resources/schema/Operations.json index 88b35f19c..bbced69df 100644 --- a/resources/schema/Operations.json +++ b/resources/schema/Operations.json @@ -255,6 +255,13 @@ "required": ["id", "operator"], "type": "object" }, + { + "properties": { + "operator": { "const": "standard_domains" } + }, + "required": ["id", "operator"], + "type": "object" + }, { "properties": { "operator": { diff --git a/resources/schema/Operations.md b/resources/schema/Operations.md index 6214da388..290635f45 100644 --- a/resources/schema/Operations.md +++ b/resources/schema/Operations.md @@ -485,6 +485,28 @@ Output Laboratory Test Results ``` +### standard_domains + +Returns a list of valid SDTM domain names from the standard metadata. This can be used to compare extracted suffixes from DOMAIN values or dataset names. + +Input + +Product: sdtmig + +Version: 3-4 + +```yaml +Operations: + - operator: standard_domains + id: $valid_domain_names +``` + +Output + +``` +["AE", "CM", "DM", "FA", "LB", "QS", ...] +``` + ### extract_metadata Returns the requested dataset level metadata value for the current dataset. Possible name values are: @@ -493,6 +515,9 @@ Returns the requested dataset level metadata value for the current dataset. Poss - dataset_location - dataset_name - dataset_label +- domain +- is_ap +- ap_suffix Example @@ -512,6 +537,26 @@ Output: Laboratory Test Results ``` +Example: ap_suffix + +Extracts the domain suffix (characters 3-4) from AP-related domains. For example, "FA" from "APFA" DOMAIN value. + +Input: + +Target domain: APFA + +```yaml +- name: ap_suffix + operator: extract_metadata + id: $ap_suffix +``` + +Output: + +``` +FA +``` + ## IG & Model Variable Operations Operations for working with Implementation Guide and model variable metadata. diff --git a/tests/unit/test_operations/test_extract_metadata.py b/tests/unit/test_operations/test_extract_metadata.py index c5f4dbe1f..c8efd7d52 100644 --- a/tests/unit/test_operations/test_extract_metadata.py +++ b/tests/unit/test_operations/test_extract_metadata.py @@ -2,10 +2,11 @@ from cdisc_rules_engine.models.dataset.dask_dataset import DaskDataset from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset from cdisc_rules_engine.models.operation_params import OperationParams +from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.operations.extract_metadata import ExtractMetadata from cdisc_rules_engine.services.cache import InMemoryCacheService from cdisc_rules_engine.services.data_services import LocalDataService -from unittest.mock import Mock +from unittest.mock import Mock, MagicMock import pytest @@ -41,3 +42,109 @@ def test_extract_metadata_get_dataset_name( assert operation_params.operation_id in result for item in result[operation_params.operation_id]: assert item == "AE" + + +def _create_mock_service(dataset_name, first_record=None): + mock_service = Mock(LocalDataService) + raw_metadata = SDTMDatasetMetadata( + name=dataset_name, + first_record=first_record, + ) + mock_service.get_dataset_metadata.return_value = pd.DataFrame.from_dict( + { + "dataset_name": [dataset_name], + "ap_suffix": [raw_metadata.ap_suffix], + "is_ap": [raw_metadata.is_ap], + "domain": [raw_metadata.domain], + } + ) + mock_service.get_raw_dataset_metadata = MagicMock(return_value=raw_metadata) + return mock_service + + +@pytest.mark.parametrize( + "dataset_name, first_record, expected_suffix", + [ + ("APFA", None, ""), + ("APXX", None, ""), + ("APLB", None, ""), + ("", {"DOMAIN": "APFA", "APID": "AP001"}, "FA"), + ("AE", {"DOMAIN": "APFA", "APID": "AP001"}, "FA"), + ("AP", {"DOMAIN": "APFA", "APID": "AP001"}, "FA"), + ("APF", {"DOMAIN": "APFA", "APID": "AP001"}, "FA"), + ("AE", None, ""), + ("LB", None, ""), + ("AP", None, ""), + ("APF", None, ""), + ("AE", {"DOMAIN": "AE"}, ""), + ("AE", {"DOMAIN": "LB"}, ""), + ("AE", {"DOMAIN": "AP"}, ""), + ("AE", {"DOMAIN": ""}, ""), + ("AE", {"DOMAIN": None}, ""), + ], +) +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_extract_metadata_domain_suffix( + operation_params: OperationParams, + dataset_type, + dataset_name, + first_record, + expected_suffix, +): + mock_data_service = _create_mock_service(dataset_name, first_record) + domain_value = ( + first_record.get("DOMAIN") + if first_record and "DOMAIN" in first_record + else "AE" + ) + operation_params.dataframe = dataset_type.from_dict( + {"STUDYID": ["TEST_STUDY"], "DOMAIN": [domain_value]} + ) + operation_params.target = "ap_suffix" + cache = InMemoryCacheService.get_instance() + operation = ExtractMetadata( + operation_params, operation_params.dataframe, cache, mock_data_service + ) + result = operation.execute() + assert operation_params.operation_id in result + assert all( + item == expected_suffix for item in result[operation_params.operation_id] + ) + + +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_extract_metadata_domain_suffix_uses_domain( + operation_params: OperationParams, dataset_type +): + mock_data_service = _create_mock_service( + "APFA", {"DOMAIN": "APXX", "APID": "AP001"} + ) + operation_params.dataframe = dataset_type.from_dict( + {"STUDYID": ["TEST_STUDY"], "DOMAIN": ["APXX"]} + ) + operation_params.target = "ap_suffix" + cache = InMemoryCacheService.get_instance() + operation = ExtractMetadata( + operation_params, operation_params.dataframe, cache, mock_data_service + ) + result = operation.execute() + assert operation_params.operation_id in result + assert all(item == "XX" for item in result[operation_params.operation_id]) + + +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_extract_metadata_domain_suffix_empty_metadata( + operation_params: OperationParams, dataset_type +): + mock_data_service = _create_mock_service("APFA", None) + operation_params.dataframe = dataset_type.from_dict( + {"STUDYID": ["TEST_STUDY"], "DOMAIN": ["APFA"]} + ) + operation_params.target = "ap_suffix" + cache = InMemoryCacheService.get_instance() + operation = ExtractMetadata( + operation_params, operation_params.dataframe, cache, mock_data_service + ) + result = operation.execute() + assert operation_params.operation_id in result + assert all(item == "" for item in result[operation_params.operation_id]) diff --git a/tests/unit/test_operations/test_standard_domains.py b/tests/unit/test_operations/test_standard_domains.py new file mode 100644 index 000000000..605eba7ca --- /dev/null +++ b/tests/unit/test_operations/test_standard_domains.py @@ -0,0 +1,96 @@ +from cdisc_rules_engine.config.config import ConfigService +from cdisc_rules_engine.models.dataset.dask_dataset import DaskDataset +from cdisc_rules_engine.models.dataset.pandas_dataset import PandasDataset +from cdisc_rules_engine.models.library_metadata_container import ( + LibraryMetadataContainer, +) +import pytest +from cdisc_rules_engine.models.operation_params import OperationParams +from cdisc_rules_engine.operations.standard_domains import StandardDomains +from cdisc_rules_engine.services.cache import InMemoryCacheService +from cdisc_rules_engine.services.data_services import LocalDataService + + +def _create_operation(operation_params, standard_metadata, dataset_type): + operation_params.dataframe = dataset_type.from_dict( + {"STUDYID": ["TEST_STUDY"], "AETERM": ["test"]} + ) + operation_params.standard = "sdtmig" + operation_params.standard_version = "3-4" + cache = InMemoryCacheService.get_instance() + library_metadata = LibraryMetadataContainer(standard_metadata=standard_metadata) + data_service = LocalDataService.get_instance( + cache_service=cache, config=ConfigService() + ) + return StandardDomains( + operation_params, + operation_params.dataframe, + cache, + data_service, + library_metadata, + ) + + +@pytest.mark.parametrize( + "domains_input, expected_domains", + [ + ({"AE", "FA", "LB", "QS", "CM", "DM"}, ["AE", "CM", "DM", "FA", "LB", "QS"]), + (["AE", "FA", "LB"], ["AE", "FA", "LB"]), + (("AE", "FA", "LB"), ["AE", "FA", "LB"]), + (["QS", "AE", "FA", "LB", "CM"], ["AE", "CM", "FA", "LB", "QS"]), + (set(), []), + ([], []), + ], +) +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_standard_domains_returns_sorted_list( + operation_params: OperationParams, + dataset_type, + domains_input, + expected_domains, +): + standard_metadata = {"domains": domains_input} + operation = _create_operation(operation_params, standard_metadata, dataset_type) + result = operation.execute() + domain_list = result[operation_params.operation_id].iloc[0] + assert domain_list == expected_domains + + +@pytest.mark.parametrize( + "standard_metadata, expected_length", + [ + ({}, 0), + ({"domains": None}, 0), + ], +) +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_standard_domains_handles_missing_or_none_domains( + operation_params: OperationParams, + dataset_type, + standard_metadata, + expected_length, +): + operation = _create_operation(operation_params, standard_metadata, dataset_type) + result = operation.execute() + domain_list = result[operation_params.operation_id].iloc[0] + assert isinstance(domain_list, list) + assert len(domain_list) == expected_length + + +@pytest.mark.parametrize( + "standard_metadata", + [ + ({"domains": {}}), + ({"domains": 123}), + ({"domains": "invalid"}), + ], +) +@pytest.mark.parametrize("dataset_type", [PandasDataset, DaskDataset]) +def test_standard_domains_raises_error_for_invalid_type( + operation_params: OperationParams, + dataset_type, + standard_metadata, +): + operation = _create_operation(operation_params, standard_metadata, dataset_type) + with pytest.raises(TypeError): + operation.execute() diff --git a/tests/unit/test_services/test_data_service/test_data_service.py b/tests/unit/test_services/test_data_service/test_data_service.py index 00b4cb208..4d7ded960 100644 --- a/tests/unit/test_services/test_data_service/test_data_service.py +++ b/tests/unit/test_services/test_data_service/test_data_service.py @@ -245,7 +245,10 @@ def test_get_dataset_class_associated_domains(): datasets = [ SDTMDatasetMetadata(**dataset) for dataset in [ - {"first_record": {"DOMAIN": "APDM"}, "filename": "apdm.xpt"}, + { + "first_record": {"DOMAIN": "APDM", "APID": "AP001"}, + "filename": "apdm.xpt", + }, {"first_record": {"DOMAIN": "DM"}, "filename": "dm.xpt"}, ] ] diff --git a/tests/unit/test_utilities/test_rule_processor.py b/tests/unit/test_utilities/test_rule_processor.py index 3b8cc3466..e4184557a 100644 --- a/tests/unit/test_utilities/test_rule_processor.py +++ b/tests/unit/test_utilities/test_rule_processor.py @@ -60,8 +60,13 @@ ) def test_rule_applies_to_domain(mock_data_service, name, rule_metadata, outcome): processor = RuleProcessor(mock_data_service, InMemoryCacheService()) + first_record = None + if name in ("APTE", "APFASU", "APRELSUB"): + first_record = {"DOMAIN": name, "APID": "AP001"} assert ( - processor.rule_applies_to_domain(SDTMDatasetMetadata(name=name), rule_metadata) + processor.rule_applies_to_domain( + SDTMDatasetMetadata(name=name, first_record=first_record), rule_metadata + ) == outcome ) diff --git a/tests/unit/test_utilities/test_utils.py b/tests/unit/test_utilities/test_utils.py index fd32b9ac7..6917022c0 100644 --- a/tests/unit/test_utilities/test_utils.py +++ b/tests/unit/test_utilities/test_utils.py @@ -45,6 +45,68 @@ def test_is_supp_dataset(mock_dataset, expected): ), f"Expected {expected} but got {result} for datasets {mock_datasets}" +is_ap_tests = [ + ({"first_record": {"DOMAIN": "APFA", "APID": "AP001"}}, True), + ({"first_record": {"DOMAIN": "APXX", "APID": "AP002"}}, True), + ({"first_record": {"DOMAIN": "APQS", "APID": "AP003"}}, True), + ({"first_record": {"DOMAIN": "APFAMH", "APID": "AP004"}}, True), + ({"first_record": {"DOMAIN": "AE"}}, False), + ({"first_record": {"DOMAIN": "LB"}}, False), + ({"first_record": {"DOMAIN": "AP"}}, False), + ({"first_record": {"DOMAIN": "APF"}}, False), + ({"first_record": None}, False), + ({"first_record": {}}, False), + ({}, False), + ({"name": "SQAPQS", "first_record": {"RDOMAIN": "APQS"}}, True), + ({"name": "SQAPQSX", "first_record": {"RDOMAIN": "APQS"}}, True), + ({"name": "SQAPQSXX", "first_record": {"RDOMAIN": "APQS"}}, True), + ({"name": "SUPPQS", "first_record": {"RDOMAIN": "QS"}}, False), + ({"name": "SQAPQS", "first_record": {"RDOMAIN": "AP"}}, False), + ({"name": "SQAPQS", "first_record": {"RDOMAIN": "APF"}}, False), + ({"first_record": {"APID": "AP001"}}, True), + ({"first_record": {"DOMAIN": "AP", "APID": "AP001"}}, True), + ({"first_record": {"DOMAIN": "APF", "APID": "AP001"}}, True), +] + + +@pytest.mark.parametrize("mock_dataset, expected", is_ap_tests) +def test_is_ap_dataset(mock_dataset, expected): + result = SDTMDatasetMetadata(**mock_dataset).is_ap + assert ( + result == expected + ), f"Expected {expected} but got {result} for dataset {mock_dataset}" + + +ap_suffix_tests = [ + ({"first_record": {"DOMAIN": "APFA", "APID": "AP001"}}, "FA"), + ({"first_record": {"DOMAIN": "APXX", "APID": "AP002"}}, "XX"), + ({"first_record": {"DOMAIN": "APQS", "APID": "AP003"}}, "QS"), + ({"first_record": {"DOMAIN": "APLB", "APID": "AP004"}}, "LB"), + ({"first_record": {"DOMAIN": "APFAMH", "APID": "AP005"}}, "FA"), + ({"first_record": {"DOMAIN": "AE"}}, ""), + ({"first_record": {"DOMAIN": "LB"}}, ""), + ({"first_record": {"DOMAIN": "AP"}}, ""), + ({"first_record": {"DOMAIN": "APF"}}, ""), + ({"first_record": None}, ""), + ({"first_record": {}}, ""), + ({}, ""), + ({"name": "SQAPQS", "first_record": {"RDOMAIN": "APQS"}}, ""), + ({"name": "SQAPQSX", "first_record": {"RDOMAIN": "APQS"}}, ""), + ({"name": "SQAPQSXX", "first_record": {"RDOMAIN": "APQS"}}, ""), + ({"first_record": {"APID": "AP001"}}, ""), + ({"first_record": {"DOMAIN": "AP", "APID": "AP001"}}, ""), + ({"first_record": {"DOMAIN": "APF", "APID": "AP001"}}, ""), +] + + +@pytest.mark.parametrize("mock_dataset, expected", ap_suffix_tests) +def test_ap_suffix_property(mock_dataset, expected): + result = SDTMDatasetMetadata(**mock_dataset).ap_suffix + assert ( + result == expected + ), f"Expected {expected} but got {result} for dataset {mock_dataset}" + + datasets = [ SDTMDatasetMetadata(**dataset) for dataset in [