-
Notifications
You must be signed in to change notification settings - Fork 27
1421: load variables ccode correctly #1449
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bc2a46c
ef917ab
bbd5e1a
f855e80
8be107f
c05b5cd
a886c98
cab8c72
8f00238
0735172
9602ad4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,7 +9,7 @@ | |
| get_corresponding_datasets, | ||
| tag_source, | ||
| ) | ||
| from typing import List, Iterable | ||
| from typing import List, Iterable, Optional | ||
| from cdisc_rules_engine.utilities import sdtm_utilities | ||
| from cdisc_rules_engine.utilities.rule_processor import RuleProcessor | ||
| from cdisc_rules_engine.models.dataset.dataset_interface import DatasetInterface | ||
|
|
@@ -156,9 +156,17 @@ def get_define_xml_variables_metadata(self) -> List[dict]: | |
| define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader( | ||
| self.dataset_path, self.define_xml_path, self.data_service, self.cache | ||
| ) | ||
| return define_xml_reader.extract_variables_metadata( | ||
| domain_name=self.dataset_metadata.domain | ||
| ) | ||
| # If domain is not set and this is a SUPP domain, use rdomain | ||
| domain = self.dataset_metadata.domain | ||
| if not domain and getattr(self.dataset_metadata, "is_supp", False): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the problem I described in my comment above. "the other issue I see would occur downstream from this: _get_domain_metadata() in base_define_xml__reader is looking only based on domain and grabbing the first that matches. This will be an issue when differentiating Supp-- from Parents." |
||
| domain = getattr(self.dataset_metadata, "rdomain", None) | ||
| name = getattr(self.dataset_metadata, "name", None) | ||
| return define_xml_reader.extract_variables_metadata( | ||
| domain_name=domain, name=name | ||
| ) | ||
| if not domain: | ||
| return [] | ||
| return define_xml_reader.extract_variables_metadata(domain_name=domain) | ||
|
|
||
| def get_define_xml_value_level_metadata(self) -> List[dict]: | ||
| """ | ||
|
|
@@ -205,13 +213,16 @@ def get_library_variables_metadata(self) -> DatasetInterface: | |
| variables: List[dict] = sdtm_utilities.get_variables_metadata_from_standard( | ||
| domain=domain, library_metadata=self.library_metadata | ||
| ) | ||
| variables_metadata: dict = self.library_metadata.variables_metadata.get( | ||
| domain, {} | ||
| ) | ||
| for variable in variables: | ||
| variable["ccode"] = "" | ||
| if variable.get("codelistSubmissionValues"): | ||
| variable_metadata: Optional[dict] = variables_metadata.get(variable["name"]) | ||
| if variable_metadata: | ||
| if "_links" in variable and "codelist" in variable["_links"]: | ||
| first_codelist = variable["_links"]["codelist"][0] | ||
| href = first_codelist["href"] | ||
| codelist_code = href.split("/")[-1] | ||
| codelist_code = first_codelist["href"].split("/")[-1] | ||
| variable["ccode"] = codelist_code | ||
| if "role" not in variable: | ||
| variable["role"] = "" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -335,8 +335,6 @@ def validate_rule( | |
| elif ( | ||
| rule.get("rule_type") | ||
| == RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE.value | ||
| or rule.get("rule_type") | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you please add details to the PR explaining why this check was removed and summarize your overall work?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After I fixed the dataset builder to correctly load I've started diving in and noticed that after calling
Pay attention to the conditions 2 and 3 - comparator values become |
||
| == RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE_XML_AND_LIBRARY.value | ||
| ): | ||
| self.rule_processor.add_comparator_to_rule_conditions( | ||
| rule, comparator=None, target_prefix="define_" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,131 @@ | ||
| import os | ||
| import subprocess | ||
| import openpyxl | ||
| import pytest | ||
| from conftest import get_python_executable | ||
| from QARegressionTests.globals import ( | ||
| dataset_details_sheet, | ||
| issue_datails_sheet, | ||
| rules_report_sheet, | ||
| issue_sheet_variable_column, | ||
| issue_sheet_coreid_column, | ||
| ) | ||
|
|
||
|
|
||
| @pytest.mark.regression | ||
| def test_validate_define_xml_against_lib_metadata(): | ||
| command = [ | ||
| f"{get_python_executable()}", | ||
| "-m", | ||
| "core", | ||
| "validate", | ||
| "-s", | ||
| "sdtmig", | ||
| "-v", | ||
| "3-4", | ||
| "-dp", | ||
| os.path.join( | ||
| "tests", | ||
| "resources", | ||
| "CoreIssue1421", | ||
| "Dataset.json", | ||
| ), | ||
| "-lr", | ||
| os.path.join("tests", "resources", "CoreIssue1421", "Rule.yml"), | ||
| "-dxp", | ||
| os.path.join("tests", "resources", "CoreIssue1421", "Define.xml"), | ||
| ] | ||
| subprocess.run(command, check=True) | ||
|
|
||
| # Get the latest created Excel file | ||
| files = os.listdir() | ||
| excel_files = [ | ||
| file | ||
| for file in files | ||
| if file.startswith("CORE-Report-") and file.endswith(".xlsx") | ||
| ] | ||
| excel_file_path = sorted(excel_files)[-1] | ||
| # Open the Excel file | ||
| workbook = openpyxl.load_workbook(excel_file_path) | ||
|
|
||
| # Go to the "Issue Details" sheet | ||
| sheet = workbook[issue_datails_sheet] | ||
|
|
||
| variables_values_column = sheet[issue_sheet_variable_column] | ||
| variables_values = [ | ||
| cell.value for cell in variables_values_column[1:] if cell.value is not None | ||
| ] | ||
| assert len(variables_values) == 1 | ||
| for value in variables_values: | ||
| assert len(value.split(",")) == 6 | ||
|
|
||
| variables_names_column = sheet["H"] | ||
| variables_names_values = [ | ||
| cell.value for cell in variables_names_column[1:] if cell.value is not None | ||
| ] | ||
| assert len(variables_names_values) == 1 | ||
| for value in variables_names_values: | ||
| assert len(value.split(",")) == 6 | ||
|
|
||
| dataset_column = sheet["D"] | ||
| dataset_column_values = [ | ||
| cell.value for cell in dataset_column[1:] if cell.value is not None | ||
| ] | ||
| assert sorted(set(dataset_column_values)) == ["dm.xpt"] | ||
|
|
||
| core_id_column = sheet[issue_sheet_coreid_column] | ||
| core_id_column_values = [ | ||
| cell.value for cell in core_id_column[1:] if cell.value is not None | ||
| ] | ||
| assert set(core_id_column_values) == {"CDISC.SDTMIG.CG0999"} | ||
|
|
||
| # Go to the "Rules Report" sheet | ||
| rules_values = [ | ||
| row for row in workbook[rules_report_sheet].iter_rows(values_only=True) | ||
| ][1:] | ||
| rules_values = [row for row in rules_values if any(row)] | ||
| assert rules_values[0][0] == "CDISC.SDTMIG.CG0999" | ||
| assert "SUCCESS" in rules_values[0] | ||
| assert ( | ||
| rules_values[0][4] | ||
| == "Issue with codelist definition in the Define-XML document." | ||
| ) | ||
|
|
||
| # Go to the "Dataset Details" sheet | ||
| dataset_sheet = workbook[dataset_details_sheet] | ||
| dataset_values = [row for row in dataset_sheet.iter_rows(values_only=True)][1:] | ||
| dataset_values = [row for row in dataset_values if any(row)] | ||
| assert len(dataset_values) > 0 | ||
| dataset_names = set(row[0] for row in dataset_values if row[0] is not None) | ||
| assert dataset_names == {"ae.xpt", "dm.xpt", "ec.xpt", "ex.xpt", "suppec.xpt"} | ||
| expected_records = { | ||
| "ae.xpt": 74, | ||
| "dm.xpt": 18, | ||
| "ec.xpt": 1590, | ||
| "ex.xpt": 1583, | ||
| "suppec.xpt": 13, | ||
| } | ||
| for row in dataset_values: | ||
| dataset_name = row[0] | ||
| records_count = row[-1] | ||
| assert records_count == expected_records[dataset_name] | ||
|
|
||
| # Go to the "Issue Summary" sheet | ||
| issue_summary_sheet = workbook["Issue Summary"] | ||
| summary_values = [row for row in issue_summary_sheet.iter_rows(values_only=True)][ | ||
| 1: | ||
| ] | ||
| summary_values = [row for row in summary_values if any(row)] | ||
| assert len(summary_values) == 1 | ||
| core_ids = set(row[1] for row in summary_values if row[1] is not None) | ||
| assert core_ids == {"CDISC.SDTMIG.CG0999"} | ||
| # Check Message and dataset columns | ||
| assert ( | ||
| summary_values[0][2] | ||
| == "Issue with codelist definition in the Define-XML document." | ||
| ) | ||
| assert summary_values[0][0] == "dm.xpt" | ||
|
|
||
| # Delete the excel file | ||
| if os.path.exists(excel_file_path): | ||
| os.remove(excel_file_path) |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am getting an error when I run Marcelina's data.
On line 159
extract_variables_metadata is being called with the None for domain for SUPPEC. We need a check for self.dataset_metadata.is_supp() and if so call extract_variables_metadata with the rdomain property. You could also do a check if domain exists, and if not, use rdomain.
the other issue I see would occur downstream from this: _get_domain_metadata() in base_define_xml__reader is looking only based on domain and grabbing the first that matches. This will be an issue when differentiating Supp-- from Parents.

I suspect passing the name and the domain/rdomain may be necessary and using the Name and the Domain property of the itemgroupdef to pick which metadata to grab would be best plan of attack