From 8807f61746f988569556dc3a9f254182291fed02 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Fri, 2 Jan 2026 12:02:15 -0500 Subject: [PATCH 1/4] Add define_dataset_variable_order with OrderNumber sorting support (#791) --- .../dataset_builders/base_dataset_builder.py | 6 +- .../contents_define_dataset_builder.py | 1 + ...dataset_metadata_define_dataset_builder.py | 40 ++++++++++++- .../define_item_group_dataset_builder.py | 1 + .../define_xml/base_define_xml_reader.py | 21 +++++++ resources/schema/MetaVariables.json | 6 ++ tests/unit/test_define_xml_reader.py | 59 +++++++++++++++++++ 7 files changed, 129 insertions(+), 5 deletions(-) diff --git a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py index e1e07b1b7..64d25fd32 100644 --- a/cdisc_rules_engine/dataset_builders/base_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/base_dataset_builder.py @@ -109,10 +109,10 @@ def get_dataset_contents(self, **kwargs): def get_define_xml_item_group_metadata_for_dataset( self, dataset_metadata: SDTMDatasetMetadata - ) -> List[dict]: + ) -> dict: """ Gets Define XML item group metadata - returns a list of dictionaries containing the following keys: + returns a dictionary containing the following keys: "define_dataset_name" "define_dataset_label" "define_dataset_location" @@ -120,6 +120,7 @@ def get_define_xml_item_group_metadata_for_dataset( "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" """ @@ -141,6 +142,7 @@ def get_define_xml_item_group_metadata_for_domain(self, domain: str) -> List[dic "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" """ diff --git a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py index b048e3a0e..a64fa8d9a 100644 --- a/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/contents_define_dataset_builder.py @@ -26,6 +26,7 @@ def build(self): define_dataset_name - dataset name from define_xml define_dataset_structure - dataset structure define_dataset_variables - list of variables in the dataset + define_dataset_variable_order - ordered list of variables in the dataset ..., """ diff --git a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py index 65b5eefb4..b52d3b395 100644 --- a/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/dataset_metadata_define_dataset_builder.py @@ -1,6 +1,9 @@ from cdisc_rules_engine.models.dataset import DatasetInterface from cdisc_rules_engine.services import logger from cdisc_rules_engine.dataset_builders.base_dataset_builder import BaseDatasetBuilder +from cdisc_rules_engine.services.define_xml.define_xml_reader_factory import ( + DefineXMLReaderFactory, +) import os import numpy as np @@ -28,8 +31,10 @@ def build(self): define_dataset_class - dataset class define_dataset_structure - dataset structure define_dataset_is_non_standard - whether a dataset is a standard - define_dataset_variables - dataset variables - define_dataset_key_sequence - dataset key sequence + define_dataset_variables - list of variables in the dataset + define_dataset_variable_order - ordered list of variables in the dataset + (ordered by OrderNumber if present, otherwise by XML document order) + define_dataset_key_sequence - ordered list of key sequence variables in the dataset ..., """ @@ -80,12 +85,41 @@ def _get_define_xml_dataframe(self): "define_dataset_class", "define_dataset_structure", "define_dataset_is_non_standard", + "define_dataset_variables", + "define_dataset_variable_order", + "define_dataset_key_sequence", ] define_metadata = self.get_define_metadata() if not define_metadata: logger.info(f"No define_metadata is provided for {__name__}.") return self.dataset_implementation(columns=define_col_order) - return self.dataset_implementation.from_records(define_metadata) + define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader( + self.dataset_path, self.define_xml_path, self.data_service, self.cache + ) + enriched_metadata = [] + for basic_metadata in define_metadata: + dataset_name = basic_metadata.get("define_dataset_name") + if dataset_name: + try: + full_metadata = define_xml_reader.extract_dataset_metadata( + dataset_name + ) + enriched_metadata.append(full_metadata) + except Exception as e: + logger.trace(e) + logger.error( + f"Error extracting metadata for {dataset_name}: {str(e)}" + ) + basic_metadata["define_dataset_variables"] = None + basic_metadata["define_dataset_variable_order"] = None + basic_metadata["define_dataset_key_sequence"] = None + enriched_metadata.append(basic_metadata) + else: + basic_metadata["define_dataset_variables"] = None + basic_metadata["define_dataset_variable_order"] = None + basic_metadata["define_dataset_key_sequence"] = None + enriched_metadata.append(basic_metadata) + return self.dataset_implementation.from_records(enriched_metadata) def _ensure_required_columns(self, dataset_df, dataset_col_order): if "dataset_size" not in dataset_df.columns: diff --git a/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py b/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py index 471b1259d..6838b2245 100644 --- a/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py +++ b/cdisc_rules_engine/dataset_builders/define_item_group_dataset_builder.py @@ -15,6 +15,7 @@ def build(self): "define_dataset_structure" "define_dataset_is_non_standard" "define_dataset_variables" + "define_dataset_variable_order" "define_dataset_key_sequence" """ item_group_metadata: List[dict] = ( diff --git a/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py b/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py index fb57c1ea5..5d08f235c 100644 --- a/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py +++ b/cdisc_rules_engine/services/define_xml/base_define_xml_reader.py @@ -120,6 +120,11 @@ def extract_dataset_metadata(self, dataset_name: Union[str, None] = None) -> dic for item in dataset_metadata.ItemRef if item.ItemOID in item_mapping ] + dataset_metadata_dict["define_dataset_variable_order"] = ( + self._get_ordered_dataset_variables( + dataset_metadata.ItemRef, item_mapping + ) + ) dataset_metadata_dict["define_dataset_key_sequence"] = ( self.get_dataset_key_sequence(dataset_name) ) @@ -148,6 +153,11 @@ def extract_domain_metadata(self, domain_name: str = None) -> dict: for item in domain_metadata.ItemRef if item.ItemOID in item_mapping ] + domain_metadata_dict["define_dataset_variable_order"] = ( + self._get_ordered_dataset_variables( + domain_metadata.ItemRef, item_mapping + ) + ) domain_metadata_dict["define_dataset_key_sequence"] = ( self.get_domain_key_sequence(domain_name) ) @@ -338,6 +348,17 @@ def _get_order_number(self, itemref, index): else: return index + 1 + def _get_ordered_dataset_variables(self, item_refs, item_mapping): + item_refs_with_order = [] + for index, item_ref in enumerate(item_refs): + if item_ref.ItemOID in item_mapping: + order_number = self._get_order_number(item_ref, index) + item_def = item_mapping.get(item_ref.ItemOID) + if item_def: + item_refs_with_order.append((order_number, item_def.Name)) + item_refs_with_order.sort(key=lambda x: x[0]) + return [var_name for _, var_name in item_refs_with_order] + def _get_item_def_representation(self, itemdef, itemref, codelists, index) -> dict: """ Returns item def as a dictionary diff --git a/resources/schema/MetaVariables.json b/resources/schema/MetaVariables.json index 2e3a690a7..2bc76e315 100644 --- a/resources/schema/MetaVariables.json +++ b/resources/schema/MetaVariables.json @@ -29,6 +29,12 @@ { "const": "define_dataset_structure" }, + { + "const": "define_dataset_variables" + }, + { + "const": "define_dataset_variable_order" + }, { "const": "define_variable_allowed_terms" }, diff --git a/tests/unit/test_define_xml_reader.py b/tests/unit/test_define_xml_reader.py index 7a9fbbee7..6fa11d071 100644 --- a/tests/unit/test_define_xml_reader.py +++ b/tests/unit/test_define_xml_reader.py @@ -109,6 +109,19 @@ def test_extract_domain_metadata(filename): "TSVCDREF", "TSVCDVER", ], + "define_dataset_variable_order": [ + "STUDYID", + "DOMAIN", + "TSSEQ", + "TSGRPID", + "TSPARMCD", + "TSPARM", + "TSVAL", + "TSVALNF", + "TSVALCD", + "TSVCDREF", + "TSVCDVER", + ], "define_dataset_key_sequence": ["STUDYID", "TSPARMCD", "TSVAL", "TSSEQ"], } @@ -370,3 +383,49 @@ def test_read_dictionary_version(dictionary_type, expected_version): reader = DefineXMLReaderFactory.from_file_contents(contents) version = reader.get_external_dictionary_version(dictionary_type) assert version == expected_version + + +@pytest.mark.parametrize( + "filename", [(test_define_file_path), (test_define_2_0_file_path)] +) +def test_extract_dataset_metadata_with_ordernumber(filename): + """ + Unit test for DefineXMLReader.extract_dataset_metadata function. + Tests that define_dataset_variable_order respects OrderNumber attributes. + """ + with open(filename, "rb") as file: + contents: bytes = file.read() + reader = DefineXMLReaderFactory.from_file_contents(contents) + dataset_metadata: dict = reader.extract_dataset_metadata(dataset_name="SE") + assert "define_dataset_variables" in dataset_metadata + assert "define_dataset_variable_order" in dataset_metadata + assert isinstance(dataset_metadata["define_dataset_variables"], list) + assert isinstance(dataset_metadata["define_dataset_variable_order"], list) + assert len(dataset_metadata["define_dataset_variables"]) == len( + dataset_metadata["define_dataset_variable_order"] + ) + assert dataset_metadata["define_dataset_variable_order"][0] == "STUDYID" + assert dataset_metadata["define_dataset_variable_order"][1] == "DOMAIN" + assert dataset_metadata["define_dataset_variable_order"][2] == "USUBJID" + assert dataset_metadata["define_dataset_variable_order"][3] == "SESEQ" + assert dataset_metadata["define_dataset_variable_order"][4] == "ETCD" + + +@pytest.mark.parametrize( + "filename", [(test_define_file_path), (test_define_2_0_file_path)] +) +def test_extract_dataset_metadata_without_ordernumber(filename): + """ + Unit test for DefineXMLReader.extract_dataset_metadata function. + Tests that define_dataset_variable_order uses XML document order when OrderNumber is not present. + """ + with open(filename, "rb") as file: + contents: bytes = file.read() + reader = DefineXMLReaderFactory.from_file_contents(contents) + dataset_metadata: dict = reader.extract_dataset_metadata(dataset_name="TS") + assert "define_dataset_variables" in dataset_metadata + assert "define_dataset_variable_order" in dataset_metadata + assert ( + dataset_metadata["define_dataset_variable_order"] + == dataset_metadata["define_dataset_variables"] + ) From 628757d524e1b9af946d005e72afdb51b01bc685 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Fri, 2 Jan 2026 13:40:09 -0500 Subject: [PATCH 2/4] Fix test_validate_dataset_metadata_against_define_xml by mocking DefineXMLReaderFactory --- tests/unit/test_rules_engine.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/unit/test_rules_engine.py b/tests/unit/test_rules_engine.py index c27468bfb..158f23289 100644 --- a/tests/unit/test_rules_engine.py +++ b/tests/unit/test_rules_engine.py @@ -1335,6 +1335,9 @@ def test_validate_single_dataset_not_equal_to( ), ], ) +@patch( + "cdisc_rules_engine.services.define_xml.define_xml_reader_factory.DefineXMLReaderFactory.get_define_xml_reader" +) @patch( "cdisc_rules_engine.dataset_builders.base_dataset_builder." + "BaseDatasetBuilder.get_define_metadata" @@ -1345,6 +1348,7 @@ def test_validate_single_dataset_not_equal_to( def test_validate_dataset_metadata_against_define_xml( mock_get_dataset_metadata: MagicMock, mock_get_define_xml_metadata_for_domain: MagicMock, + mock_get_define_xml_reader: MagicMock, define_xml_validation_rule: dict, define_xml_metadata: dict, dataset_mock: PandasDataset, @@ -1354,6 +1358,9 @@ def test_validate_dataset_metadata_against_define_xml( Unit test for Define XML validation. Creates an invalid dataset and validates it against Define XML. """ + mock_reader = MagicMock() + mock_reader.extract_dataset_metadata.side_effect = Exception("Mock exception") + mock_get_define_xml_reader.return_value = mock_reader mock_get_define_xml_metadata_for_domain.return_value = define_xml_metadata mock_get_dataset_metadata.return_value = dataset_mock From a45e1a3b6f6bb3a45940dc6017df19fd66f82186 Mon Sep 17 00:00:00 2001 From: Rakesh Date: Tue, 6 Jan 2026 10:06:44 -0500 Subject: [PATCH 3/4] Update test_extract_dataset_metadata to include define_dataset_variable_order field --- tests/unit/test_define_xml_reader.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/test_define_xml_reader.py b/tests/unit/test_define_xml_reader.py index 0a6acd58c..d8a763808 100644 --- a/tests/unit/test_define_xml_reader.py +++ b/tests/unit/test_define_xml_reader.py @@ -160,6 +160,19 @@ def test_extract_dataset_metadata(filename): "TSVCDREF", "TSVCDVER", ], + "define_dataset_variable_order": [ + "STUDYID", + "DOMAIN", + "TSSEQ", + "TSGRPID", + "TSPARMCD", + "TSPARM", + "TSVAL", + "TSVALNF", + "TSVALCD", + "TSVCDREF", + "TSVCDVER", + ], "define_dataset_key_sequence": ["STUDYID", "TSPARMCD", "TSVAL", "TSSEQ"], "define_dataset_has_no_data": False, } From fd3647ee33ab8715d2e2844d8ee0d08c4add1351 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 19 Feb 2026 13:43:03 -0500 Subject: [PATCH 4/4] converted issue templates to forms --- .github/ISSUE_TEMPLATE/1-bug.yml | 115 ++++++++++++++++++ .github/ISSUE_TEMPLATE/2-feature.yml | 109 +++++++++++++++++ .github/ISSUE_TEMPLATE/config.yml | 4 + .github/ISSUE_TEMPLATE/rule-blocking-bug.md | 35 ------ .../rule-blocking-enhancement.md | 35 ------ 5 files changed, 228 insertions(+), 70 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/1-bug.yml create mode 100644 .github/ISSUE_TEMPLATE/2-feature.yml create mode 100644 .github/ISSUE_TEMPLATE/config.yml delete mode 100644 .github/ISSUE_TEMPLATE/rule-blocking-bug.md delete mode 100644 .github/ISSUE_TEMPLATE/rule-blocking-enhancement.md diff --git a/.github/ISSUE_TEMPLATE/1-bug.yml b/.github/ISSUE_TEMPLATE/1-bug.yml new file mode 100644 index 000000000..8ca9442ef --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug.yml @@ -0,0 +1,115 @@ +name: Bug Report +description: An unexpected problem or behavior +projects: + - cdisc-org/19 +type: Bug +body: + - type: dropdown + id: standard + validations: + required: true + attributes: + label: Standard + multiple: true + options: + - CDISC ADaM + - CDISC Define-XML + - CDISC SDTM/SDTMIG + - CDISC SEND + - CDISC USDM + - FDA BR SDTM + - FDA BR SDTMIG + - FDA BR SEND + - FDA VR SDTMIG + - FDA VR SENDIG + - Custom + - type: input + id: reference_rule_id + validations: + required: true + attributes: + label: Reference Rule ID(s) + - type: input + id: core_id + attributes: + label: Conformance Rule ID(s) (if published in CORE) + value: CORE- + - type: input + id: jira + attributes: + label: JIRA Ticket + value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- + - type: markdown + attributes: + value: "In the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + - type: textarea + id: rule + validations: + required: true + attributes: + label: Rule YAML + render: YAML + value: | + Authorities: + - Organization: + Standards: + - Name: + References: + - Citations: + - Cited Guidance: + Document: + Item: + Section: + Origin: + Rule Identifier: + Id: + Version: + Version: + Version: + Check: + all: + - name: + operator: + value: + Core: + Id: + Status: + Version: '1' + Description: + Executability: Fully Executable + Match Datasets: + - Keys: + - + Name: + Operations: + - id: + operator: + Outcome: + Message: + Output Variables: + - + Rule Type: + Scope: + Sensitivity: + - type: textarea + id: test_data + validations: + required: true + attributes: + label: Sample Test Data File + - type: textarea + id: output + validations: + required: true + attributes: + label: Output report and/or log file + - type: textarea + id: expected + validations: + required: true + attributes: + label: Expected output + - type: textarea + id: additional_info + attributes: + label: Any Additional Information diff --git a/.github/ISSUE_TEMPLATE/2-feature.yml b/.github/ISSUE_TEMPLATE/2-feature.yml new file mode 100644 index 000000000..552d28d26 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature.yml @@ -0,0 +1,109 @@ +name: Rule-related Feature Request +description: A request for a new feature or enhancement related to a rule +projects: + - cdisc-org/19 +type: Feature +body: + - type: dropdown + id: standard + validations: + required: true + attributes: + label: Standard + multiple: true + options: + - CDISC ADaM + - CDISC Define-XML + - CDISC SDTM/SDTMIG + - CDISC SEND + - CDISC USDM + - FDA BR SDTM + - FDA BR SDTMIG + - FDA BR SEND + - FDA VR SDTMIG + - FDA VR SENDIG + - Custom + - type: input + id: reference_rule_id + validations: + required: true + attributes: + label: Reference Rule ID(s) + - type: input + id: core_id + attributes: + label: Conformance Rule ID(s) (if published in CORE) + value: CORE- + - type: input + id: jira + attributes: + label: JIRA Ticket + value: https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- + - type: markdown + attributes: + value: "In the next fields, please provide a [Minimal Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example)." + - type: textarea + id: rule + validations: + required: true + attributes: + label: Proposed Rule YAML + render: YAML + value: | + Authorities: + - Organization: + Standards: + - Name: + References: + - Citations: + - Cited Guidance: + Document: + Item: + Section: + Origin: + Rule Identifier: + Id: + Version: + Version: + Version: + Check: + all: + - name: + operator: + value: + Core: + Id: + Status: + Version: '1' + Description: + Executability: Fully Executable + Match Datasets: + - Keys: + - + Name: + Operations: + - id: + operator: + Outcome: + Message: + Output Variables: + - + Rule Type: + Scope: + Sensitivity: + - type: textarea + id: test_data + validations: + required: true + attributes: + label: Attach any sample test data file(s) + - type: textarea + id: expected + validations: + required: true + attributes: + label: Expected output + - type: textarea + id: additional_info + attributes: + label: Any Additional Information diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..19e409fdc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,4 @@ +contact_links: + - name: CORE Discussion Forum + url: https://github.com/cdisc-org/cdisc-rules-engine/discussions/categories/general + about: Please ask and answer general questions here. diff --git a/.github/ISSUE_TEMPLATE/rule-blocking-bug.md b/.github/ISSUE_TEMPLATE/rule-blocking-bug.md deleted file mode 100644 index 1d8072bca..000000000 --- a/.github/ISSUE_TEMPLATE/rule-blocking-bug.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Rule-Blocking Bug -about: For bug reports that block a rule -title: 'Rule blocked: CORERULES-###' -type: Bug -assignees: '' - ---- - -- Attach a .txt file with the JSON containing the full request. This can be copied from the Rule Editor under Test->Results->Request. -- Replace the "###" in the issue title with the primary JIRA ticket number for the rule. -- Fill in the following information - -**Links to related JIRA Tickets** -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - -**Rule Information** -- **Standard**: -- **Rule ID**: -- **Rule Description**: - -**Describe the bug** -A clear and concise description of what the bug is. - -**Error returned from Rule Engine** -```json -Replace this with the contents from the Rule Editor under Test->Results->Result. If there are no results, provide the error message -``` - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. diff --git a/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md b/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md deleted file mode 100644 index 953e75a6b..000000000 --- a/.github/ISSUE_TEMPLATE/rule-blocking-enhancement.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Rule-Blocking Enhancement -about: For feature requests that block a rule -title: 'Rule blocked: CORERULES-###' -type: Feature -assignees: '' - ---- - -- Attach test dataset files that can be used to test the new feature -- Replace the "###" in the issue title with the primary JIRA ticket number for the rule. -- Fill in the following information - -**Links to related JIRA Tickets** -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- -- https://jira.cdisc.org/projects/CORERULES/issues/CORERULES- - -**Rule Information** -- **Standard**: -- **Rule ID**: -- **Rule Description**: - -**Describe the problem** -A clear and concise description of the problem. - -**Describe the solution** -A clear and concise description of how this problem could be solved. - -**Proposed rule logic** -```yaml -Replace this with an example of how the new solution could be written as a rule -``` - -**Screenshots** -If applicable, add screenshots to help explain your problem and solution.