diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml new file mode 100644 index 000000000..8ca9442ef --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug.yml @@ -0,0 +1,115 @@ +name: Bug Report +description: An unexpected problem or behavior +projects: + - cdisc-org/19 +type: Bug +body: + - type: dropdown + id: standard + validations: + required: true + attributes: + label: Standard + multiple: true + options: + - CDISC ADaM + - CDISC Define-XML + - CDISC SDTM/SDTMIG + - CDISC SEND + - CDISC USDM + - FDA BR SDTM + - FDA BR SDTMIG + - FDA BR SEND + - FDA VR SDTMIG + - FDA VR SENDIG + - Custom + - type: input + id: reference_rule_id + validations: + required: true + attributes: + label: Reference Rule ID(s) + - type: input + id: core_id + attributes: + label: Conformance Rule ID(s) (if published in CORE) + value: CORE- + - type: input + id: jira + attributes: + label: JIRA Ticket + value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- + - type: markdown + attributes: + value: "In the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + - type: textarea + id: rule + validations: + required: true + attributes: + label: Rule YAML + render: YAML + value: | + Authorities: + - Organization: + Standards: + - Name: + References: + - Citations: + - Cited Guidance: + Document: + Item: + Section: + Origin: + Rule Identifier: + Id: + Version: + Version: + Version: + Check: + all: + - name: + operator: + value: + Core: + Id: + Status: + Version: '1' + Description: + Executability: Fully Executable + Match Datasets: + - Keys: + - + Name: + Operations: + - id: + operator: + Outcome: + Message: + Output Variables: + - + Rule Type: + Scope: + Sensitivity: + - type: textarea + id: test_data + validations: + required: true + attributes: + label: Sample Test Data File + - type: textarea + id: output + validations: + required: true + attributes: + label: Output report and/or log file + - type: textarea + id: expected + validations: + required: true + attributes: + label: Expected output + - type: textarea + id: additional_info + attributes: + label: Any Additional Information diff --git a/.github/ISSUE_TEMPLATE/2-feature.yml b/.github/ISSUE_TEMPLATE/2-feature.yml new file mode 100644 index 000000000..552d28d26 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature.yml @@ -0,0 +1,109 @@ +name: Rule-related Feature Request +description: A request for a new feature or enhancement related to a rule +projects: + - cdisc-org/19 +type: Feature +body: + - type: dropdown + id: standard + validations: + required: true + attributes: + label: Standard + multiple: true + options: + - CDISC ADaM + - CDISC Define-XML + - CDISC SDTM/SDTMIG + - CDISC SEND + - CDISC USDM + - FDA BR SDTM + - FDA BR SDTMIG + - FDA BR SEND + - FDA VR SDTMIG + - FDA VR SENDIG + - Custom + - type: input + id: reference_rule_id + validations: + required: true + attributes: + label: Reference Rule ID(s) + - type: input + id: core_id + attributes: + label: Conformance Rule ID(s) (if published in CORE) + value: CORE- + - type: input + id: jira + attributes: + label: JIRA Ticket + value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- + - type: markdown + attributes: + value: "In the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + - type: textarea + id: rule + validations: + required: true + attributes: + label: Proposed Rule YAML + render: YAML + value: | + Authorities: + - Organization: + Standards: + - Name: + References: + - Citations: + - Cited Guidance: + Document: + Item: + Section: + Origin: + Rule Identifier: + Id: + Version: + Version: + Version: + Check: + all: + - name: + operator: + value: + Core: + Id: + Status: + Version: '1' + Description: + Executability: Fully Executable + Match Datasets: + - Keys: + - + Name: + Operations: + - id: + operator: + Outcome: + Message: + Output Variables: + - + Rule Type: + Scope: + Sensitivity: + - type: textarea + id: test_data + validations: + required: true + attributes: + label: Attach any sample test data file(s) + - type: textarea + id: expected + validations: + required: true + attributes: + label: Expected output + - type: textarea + id: additional_info + attributes: + label: Any Additional Information diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..19e409fdc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: CORE Discussion Forum + url: https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/general + about: Please ask and answer general questions here. diff --git a/.github/ISSUE_TEMPLATE/rule-blocking-bug.md b/.github/ISSUE_TEMPLATE/rule-blocking-bug.md deleted file mode 100644 index 1d8072bca..000000000 --- a/.github/ISSUE_TEMPLATE/rule-blocking-bug.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Rule-Blocking Bug -about: For bug reports that block a rule -title: 'Rule blocked: CORERULES-###' -type: Bug -assignees: '' - ---- - -- Attach a .txt file with the JSON containing the full request. This can be copied from the Rule Editor under Test->Results->Request. -- Replace the "###" in the issue title with the primary JIRA ticket number for the rule. -- Fill in the following information - -**Links to related JIRA Tickets** -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - -**Rule Information** -- **Standard**: -- **Rule ID**: -- **Rule Description**: - -**Describe the bug** -A clear and concise description of what the bug is. - -**Error returned from Rule Engine** -```json -Replace this with the contents from the Rule Editor under Test->Results->Result. If there are no results, provide the error message -``` - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. diff --git a/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md b/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md deleted file mode 100644 index 953e75a6b..000000000 --- a/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Rule-Blocking Enhancement -about: For feature requests that block a rule -title: 'Rule blocked: CORERULES-###' -type: Feature -assignees: '' - ---- - -- Attach test dataset files that can be used to test the new feature -- Replace the "###" in the issue title with the primary JIRA ticket number for the rule. -- Fill in the following information - -**Links to related JIRA Tickets** -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - -**Rule Information** -- **Standard**: -- **Rule ID**: -- **Rule Description**: - -**Describe the problem** -A clear and concise description of the problem. - -**Describe the solution** -A clear and concise description of how this problem could be solved. - -**Proposed rule logic** -```yaml -Replace this with an example of how the new solution could be written as a rule -``` - -**Screenshots** -If applicable, add screenshots to help explain your problem and solution. diff --git a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py index ae67c499e..ad1bc81c4 100644 --- a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py @@ -109,10 +109,10 @@ def get_dataset_contents(self, **kwargs): def get_define_xml_item_group_metadata_for_dataset( self, dataset_metadata: SDTMDatasetMetadata - ) -> List[dict]: + ) -> dict: """ Gets Define XML item group metadata - returns a list of dictionaries containing the following keys: + returns a dictionary containing the following keys: "define_dataset_name" "define_dataset_label" "define_dataset_location" @@ -120,6 +120,7 @@ def get_define_xml_item_group_metadata_for_dataset( "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" "define_dataset_has_no_data" """ @@ -142,6 +143,7 @@ def get_define_xml_item_group_metadata_for_domain(self, domain: str) -> List[dic "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" "define_dataset_has_no_data" """ diff --git a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py index b048e3a0e..a64fa8d9a 100644 --- a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py @@ -26,6 +26,7 @@ def build(self): define_dataset_name - dataset name from define_xml define_dataset_structure - dataset structure define_dataset_variables - list of variables in the dataset + define_dataset_variable_order - ordered list of variables in the dataset ..., """ diff --git a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py index 78a25f61e..09cfb6a14 100644 --- a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py @@ -1,5 +1,8 @@ from cdisc_rules_engine.services import logger from cdisc_rules_engine.dataset_builders.base_dataset_builder import BaseDatasetBuilder +from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( + DefineXMLReaderFactory, +) import numpy as np @@ -27,9 +30,13 @@ def build(self): define_dataset_class - dataset class define_dataset_structure - dataset structure define_dataset_is_non_standard - whether a dataset is a standard - define_dataset_variables - dataset variables - define_dataset_key_sequence - dataset key sequence - define_dataset_has_no_data + define_dataset_variables - list of variables in the dataset + define_dataset_variable_order - ordered list of variables in the dataset + (ordered by OrderNumber if present, otherwise by XML document order) + define_dataset_key_sequence - ordered list of key sequence variables in the dataset + define_dataset_has_no_data - whether a dataset has no data + + ..., """ # 1. Build define xml dataframe define_df = self._get_define_xml_dataframe() @@ -63,13 +70,44 @@ def _get_define_xml_dataframe(self): "define_dataset_class", "define_dataset_structure", "define_dataset_is_non_standard", + "define_dataset_variables", + "define_dataset_variable_order", + "define_dataset_key_sequence", "define_dataset_has_no_data", ] define_metadata = self.get_define_metadata() if not define_metadata: logger.info(f"No define_metadata is provided for {__name__}.") return self.dataset_implementation(columns=define_col_order) - return self.dataset_implementation.from_records(define_metadata) + define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader( + self.dataset_path, self.define_xml_path, self.data_service, self.cache + ) + enriched_metadata = [] + for basic_metadata in define_metadata: + dataset_name = basic_metadata.get("define_dataset_name") + if dataset_name: + try: + full_metadata = define_xml_reader.extract_dataset_metadata( + dataset_name + ) + enriched_metadata.append(full_metadata) + except Exception as e: + logger.trace(e) + logger.error( + f"Error extracting metadata for {dataset_name}: {str(e)}" + ) + basic_metadata["define_dataset_variables"] = None + basic_metadata["define_dataset_variable_order"] = None + basic_metadata["define_dataset_key_sequence"] = None + basic_metadata["define_dataset_has_no_data"] = None + enriched_metadata.append(basic_metadata) + else: + basic_metadata["define_dataset_variables"] = None + basic_metadata["define_dataset_variable_order"] = None + basic_metadata["define_dataset_key_sequence"] = None + basic_metadata["define_dataset_has_no_data"] = None + enriched_metadata.append(basic_metadata) + return self.dataset_implementation.from_records(enriched_metadata) def _ensure_required_columns(self, dataset_df, dataset_col_order): if "dataset_size" not in dataset_df.columns: diff --git a/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py b/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py index 8c926b3bb..26f93e884 100644 --- a/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py @@ -15,6 +15,7 @@ def build(self): "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" "define_dataset_has_no_data" """ diff --git a/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py b/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py index fb57c1ea5..5d08f235c 100644 --- a/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py +++ b/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py @@ -120,6 +120,11 @@ def extract_dataset_metadata(self, dataset_name: Union[str, None] = None) -> dic for item in dataset_metadata.ItemRef if item.ItemOID in item_mapping ] + dataset_metadata_dict["define_dataset_variable_order"] = ( + self._get_ordered_dataset_variables( + dataset_metadata.ItemRef, item_mapping + ) + ) dataset_metadata_dict["define_dataset_key_sequence"] = ( self.get_dataset_key_sequence(dataset_name) ) @@ -148,6 +153,11 @@ def extract_domain_metadata(self, domain_name: str = None) -> dict: for item in domain_metadata.ItemRef if item.ItemOID in item_mapping ] + domain_metadata_dict["define_dataset_variable_order"] = ( + self._get_ordered_dataset_variables( + domain_metadata.ItemRef, item_mapping + ) + ) domain_metadata_dict["define_dataset_key_sequence"] = ( self.get_domain_key_sequence(domain_name) ) @@ -338,6 +348,17 @@ def _get_order_number(self, itemref, index): else: return index + 1 + def _get_ordered_dataset_variables(self, item_refs, item_mapping): + item_refs_with_order = [] + for index, item_ref in enumerate(item_refs): + if item_ref.ItemOID in item_mapping: + order_number = self._get_order_number(item_ref, index) + item_def = item_mapping.get(item_ref.ItemOID) + if item_def: + item_refs_with_order.append((order_number, item_def.Name)) + item_refs_with_order.sort(key=lambda x: x[0]) + return [var_name for _, var_name in item_refs_with_order] + def _get_item_def_representation(self, itemdef, itemref, codelists, index) -> dict: """ Returns item def as a dictionary diff --git a/resources/schema/rule/MetaVariables.json b/resources/schema/rule/MetaVariables.json index 2e3a690a7..2bc76e315 100644 --- a/resources/schema/rule/MetaVariables.json +++ b/resources/schema/rule/MetaVariables.json @@ -29,6 +29,12 @@ { "const": "define_dataset_structure" }, + { + "const": "define_dataset_variables" + }, + { + "const": "define_dataset_variable_order" + }, { "const": "define_variable_allowed_terms" }, diff --git a/tests/unit/test_define_xml_reader.py b/tests/unit/test_define_xml_reader.py index bf7b16516..d8a763808 100644 --- a/tests/unit/test_define_xml_reader.py +++ b/tests/unit/test_define_xml_reader.py @@ -110,6 +110,19 @@ def test_extract_domain_metadata(filename): "TSVCDREF", "TSVCDVER", ], + "define_dataset_variable_order": [ + "STUDYID", + "DOMAIN", + "TSSEQ", + "TSGRPID", + "TSPARMCD", + "TSPARM", + "TSVAL", + "TSVALNF", + "TSVALCD", + "TSVCDREF", + "TSVCDVER", + ], "define_dataset_key_sequence": ["STUDYID", "TSPARMCD", "TSVAL", "TSSEQ"], "define_dataset_has_no_data": False, } @@ -147,6 +160,19 @@ def test_extract_dataset_metadata(filename): "TSVCDREF", "TSVCDVER", ], + "define_dataset_variable_order": [ + "STUDYID", + "DOMAIN", + "TSSEQ", + "TSGRPID", + "TSPARMCD", + "TSPARM", + "TSVAL", + "TSVALNF", + "TSVALCD", + "TSVCDREF", + "TSVCDVER", + ], "define_dataset_key_sequence": ["STUDYID", "TSPARMCD", "TSVAL", "TSSEQ"], "define_dataset_has_no_data": False, } @@ -209,7 +235,7 @@ def test_extract_variable_metadata(filename): assert variable["define_variable_order_number"] == index + 1 -@pytest.mark.parametrize("filename", [(test_define_file_path)]) +@pytest.mark.parametrize("filename", [test_define_file_path]) def test_extract_variable_metadata_when_one_ordernumber_non_1_based(filename): """ Unit test for DefineXMLReader.extract_domain_metadata function. @@ -434,3 +460,49 @@ def test_extract_domain_metadata_nv_has_no_data(filename, has_no_data): # Check that at least one expected variable is present for v in ["NVSEQ", "NVTESTCD", "NVTEST"]: assert v in domain_metadata["define_dataset_variables"] + + +@pytest.mark.parametrize( + "filename", [(test_define_file_path), (test_define_2_0_file_path)] +) +def test_extract_dataset_metadata_with_ordernumber(filename): + """ + Unit test for DefineXMLReader.extract_dataset_metadata function. + Tests that define_dataset_variable_order respects OrderNumber attributes. + """ + with open(filename, "rb") as file: + contents: bytes = file.read() + reader = DefineXMLReaderFactory.from_file_contents(contents) + dataset_metadata: dict = reader.extract_dataset_metadata(dataset_name="SE") + assert "define_dataset_variables" in dataset_metadata + assert "define_dataset_variable_order" in dataset_metadata + assert isinstance(dataset_metadata["define_dataset_variables"], list) + assert isinstance(dataset_metadata["define_dataset_variable_order"], list) + assert len(dataset_metadata["define_dataset_variables"]) == len( + dataset_metadata["define_dataset_variable_order"] + ) + assert dataset_metadata["define_dataset_variable_order"][0] == "STUDYID" + assert dataset_metadata["define_dataset_variable_order"][1] == "DOMAIN" + assert dataset_metadata["define_dataset_variable_order"][2] == "USUBJID" + assert dataset_metadata["define_dataset_variable_order"][3] == "SESEQ" + assert dataset_metadata["define_dataset_variable_order"][4] == "ETCD" + + +@pytest.mark.parametrize( + "filename", [(test_define_file_path), (test_define_2_0_file_path)] +) +def test_extract_dataset_metadata_without_ordernumber(filename): + """ + Unit test for DefineXMLReader.extract_dataset_metadata function. + Tests that define_dataset_variable_order uses XML document order when OrderNumber is not present. + """ + with open(filename, "rb") as file: + contents: bytes = file.read() + reader = DefineXMLReaderFactory.from_file_contents(contents) + dataset_metadata: dict = reader.extract_dataset_metadata(dataset_name="TS") + assert "define_dataset_variables" in dataset_metadata + assert "define_dataset_variable_order" in dataset_metadata + assert ( + dataset_metadata["define_dataset_variable_order"] + == dataset_metadata["define_dataset_variables"] + ) diff --git a/tests/unit/test_rules_engine.py b/tests/unit/test_rules_engine.py index 62a4bd040..12f78466b 100644 --- a/tests/unit/test_rules_engine.py +++ b/tests/unit/test_rules_engine.py @@ -1335,6 +1335,9 @@ def test_validate_single_dataset_not_equal_to( ), ], ) +@patch( + "cdisc_rules_engine.services.define_xml.define_xml_reader_factory.DefineXMLReaderFactory.get_define_xml_reader" +) @patch( "cdisc_rules_engine.dataset_builders.base_dataset_builder." + "BaseDatasetBuilder.get_define_metadata" @@ -1345,6 +1348,7 @@ def test_validate_single_dataset_not_equal_to( def test_validate_dataset_metadata_against_define_xml( mock_get_dataset_metadata: MagicMock, mock_get_define_xml_metadata_for_domain: MagicMock, + mock_get_define_xml_reader: MagicMock, define_xml_validation_rule: dict, define_xml_metadata: dict, dataset_mock: PandasDataset, @@ -1354,6 +1358,9 @@ def test_validate_dataset_metadata_against_define_xml( Unit test for Define XML validation. Creates an invalid dataset and validates it against Define XML. """ + mock_reader = MagicMock() + mock_reader.extract_dataset_metadata.side_effect = Exception("Mock exception") + mock_get_define_xml_reader.return_value = mock_reader mock_get_define_xml_metadata_for_domain.return_value = define_xml_metadata mock_get_dataset_metadata.return_value = dataset_mock