Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions cdisc_rules_engine/models/library_metadata_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,14 @@ def build_ct_lists(self, ct_package_type: str, versions: str | Iterable[str]):
"extensible": [],
}
for version in {*versions}:
ct_package_data = self._load_ct_package_data(ct_package_type, version)
for codelist_code, codelist in ct_package_data.items():
ct_package_data = self._load_ct_package_data(ct_package_type, version).get(
"codelists", []
)
for codelist in ct_package_data:
if isinstance(codelist, dict) and "terms" in codelist:
ct_lists["ct_package_type"].append(ct_package_type)
ct_lists["version"].append(version)
ct_lists["codelist_code"].append(codelist_code)
ct_lists["codelist_code"].append(codelist.get("conceptId"))
ct_lists["extensible"].append(codelist.get("extensible"))
return ct_lists

Expand All @@ -123,14 +125,16 @@ def build_ct_terms(self, ct_package_type: str, versions: str | Iterable[str]):
"term_pref_term": [],
}
for version in {*versions}:
ct_package_data = self._load_ct_package_data(ct_package_type, version)
for codelist_code, codelist in ct_package_data.items():
ct_package_data = self._load_ct_package_data(ct_package_type, version).get(
"codelists", []
)
for codelist in ct_package_data:
for term in (
codelist.get("terms", []) if isinstance(codelist, dict) else []
):
ct_terms["ct_package_type"].append(ct_package_type)
ct_terms["version"].append(version)
ct_terms["codelist_code"].append(codelist_code)
ct_terms["codelist_code"].append(codelist.get("conceptId"))
ct_terms["term_code"].append(term["conceptId"])
ct_terms["term_value"].append(term["submissionValue"])
ct_terms["term_pref_term"].append(term.get("preferredTerm"))
Expand Down
23 changes: 14 additions & 9 deletions cdisc_rules_engine/operations/codelist_extensible.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,25 @@ def _handle_multiple_versions(self) -> pd.Series:
return is_extensible["extensible"]

def _handle_single_version(self) -> pd.Series:
codelist = self.params.codelist
codelist_name = self.params.codelist
ct_packages = self.library_metadata._ct_package_metadata
if "define_XML_merged_CT" in ct_packages:
ct_package_data = ct_packages["define_XML_merged_CT"]
else:
ct_package_data = next(
(pkg for name, pkg in ct_packages.items() if name != "extensible")
)
code_obj = ct_package_data["submission_lookup"].get(codelist, None)
if code_obj is None:
raise MissingDataError(f"Codelist '{codelist}' not found in metadata")
codelist_id = code_obj.get("codelist")
is_extensible = False
if codelist_id in ct_package_data:
codelist_info = ct_package_data[codelist_id]
is_extensible = codelist_info.get("extensible")
try:
codelist = next(
iter(
[
codelist
for codelist in ct_package_data.get("codelists", [])
if codelist.get("submissionValue") == codelist_name
]
)
)
except StopIteration:
raise MissingDataError(f"Codelist '{codelist_name}' not found in metadata")
is_extensible = codelist.get("extensible")
return is_extensible
72 changes: 34 additions & 38 deletions cdisc_rules_engine/operations/codelist_terms.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ def _handle_single_version(self) -> pd.Series:
using the list from comparator and the codelist map.
Returns a Series of booleans indicating whether each value is valid.
"""
codelists = self.params.codelists
codelist_names = self.params.codelists
codelist_level = self.params.level
check = self.params.returntype
codes = []
codelists = []
try:
ct_packages = self.library_metadata._ct_package_metadata
if "define_XML_merged_CT" in ct_packages:
Expand All @@ -122,47 +122,43 @@ def _handle_single_version(self) -> pd.Series:
"-- a valid define.xml file or -ct command is required to execute",
e,
)
submission_lookup = ct_package_data["submission_lookup"]
lookup_map = {k.lower(): k for k in submission_lookup.keys()}
for codelist in codelists:
original_key = lookup_map.get(codelist.lower())
if original_key is None:
raise MissingDataError(f"Codelist '{codelist}' not found in metadata")
code_obj = submission_lookup[original_key]
codes.append(code_obj)
values = []

for code_obj in codes:
values.extend(
self._get_codelist_values(
code_obj, ct_package_data, codelist_level, check
submission_lookup = {
codelist["submissionValue"].lower(): codelist
for codelist in ct_package_data.get("codelists", [])
if "submissionValue" in codelist
}
for codelist_name in codelist_names:
code_obj = submission_lookup.get(codelist_name.lower())
if code_obj is None:
raise MissingDataError(
f"Codelist '{codelist_name}' not found in metadata"
)
)
codelists.append(code_obj)
values = [
value
for codelist in codelists
for value in self._get_codelist_values(codelist, codelist_level, check)
]
return values

def _get_codelist_values(
self, code_obj: dict, ct_package_data: dict, codelist_level: str, check: str
self, codelist: dict, codelist_level: str, check: str
) -> list:
"""Extract values from a codelist based on level and check type."""
values = []
codelist_id = code_obj.get("codelist")
if codelist_id in ct_package_data:
codelist_info = ct_package_data[codelist_id]
if codelist_level == "codelist":
if code_obj.get("term") == "N/A":
if check == "code":
values.append(codelist_id)
elif check == "pref_term":
values.append(codelist_info["preferredTerm"])
else:
values.append(codelist_info["submissionValue"])
elif codelist_level == "term":
terms = codelist_info.get("terms", [])
for term in terms:
if check == "value":
values.append(term["submissionValue"])
elif check == "pref_term":
values.append(term["preferredTerm"])
else:
values.append(term["conceptId"])
if codelist_level == "codelist":
if check == "code":
values.append(codelist["conceptId"])
elif check == "pref_term":
values.append(codelist["preferredTerm"])
else:
values.append(codelist["submissionValue"])
elif codelist_level == "term":
for term in codelist.get("terms", []):
if check == "value":
values.append(term["submissionValue"])
elif check == "pref_term":
values.append(term["preferredTerm"])
else:
values.append(term["conceptId"])
return values
87 changes: 19 additions & 68 deletions cdisc_rules_engine/operations/get_codelist_attributes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
from cdisc_rules_engine.operations.base_operation import BaseOperation
from cdisc_rules_engine.models.dataset import DaskDataset
from jsonpath_ng.ext import parse


def _get_ct_package_dask(
Expand Down Expand Up @@ -207,72 +208,22 @@ def _add_codelist(self, ct_key, ct_val, ct_term_maps, ct_result):
def _extract_codes_by_attribute(
self, ct_package_data: dict, ct_attribute: str
) -> set:
submission_lookup = ct_package_data.get("submission_lookup", {})

if ct_attribute == "Term CCODE":
return self._extract_term_codes(submission_lookup)
elif ct_attribute == "Codelist CCODE":
return self._extract_codelist_codes(submission_lookup)
elif ct_attribute in ("Term Value", "Term Submission Value"):
return self._extract_term_values(submission_lookup)
elif ct_attribute == "Codelist Value":
return self._extract_codelist_values(submission_lookup)
elif ct_attribute == "Term Preferred Term":
return self._extract_preferred_terms(submission_lookup, ct_package_data)
else:
attribute_name_map = {
"Codelist CCODE": "$.codelists[*].conceptId",
"Codelist Value": "$.codelists[*].submissionValue",
"Term CCODE": "$.codelists[*].terms[*].conceptId",
"Term Value": "$.codelists[*].terms[*].submissionValue",
"Term Submission Value": "$.codelists[*].terms[*].submissionValue",
"Term Preferred Term": "$.codelists[*].terms[*].preferredTerm",
}
if ct_attribute not in attribute_name_map:
raise ValueError(f"Unsupported ct_attribute: {ct_attribute}")

def _extract_codelist_values(self, submission_lookup: dict) -> set:
codes = set()
for term_name, term_data in submission_lookup.items():
term_code = term_data.get("term")
if term_code and term_code == "N/A":
codes.add(term_name)
return codes

def _extract_term_codes(self, submission_lookup: dict) -> set:
codes = set()
for term_data in submission_lookup.values():
term_code = term_data.get("term")
if term_code and term_code != "N/A":
codes.add(term_code)
return codes

def _extract_codelist_codes(self, submission_lookup: dict) -> set:
codes = set()
for term_data in submission_lookup.values():
codelist_code = term_data.get("codelist")
if codelist_code:
codes.add(codelist_code)
return codes

def _extract_term_values(self, submission_lookup: dict) -> set:
codes = set()
for term_name, term_data in submission_lookup.items():
term_code = term_data.get("term")
if term_code and term_code != "N/A":
codes.add(term_name)
return codes

def _extract_preferred_terms(
self, submission_lookup: dict, ct_package_data: dict
) -> set:
codes = set()
for term_name, term_data in submission_lookup.items():
if not isinstance(term_data, dict):
continue
term_code = term_data.get("term")
if not term_code or term_code == "N/A":
continue
codelist_id = term_data.get("codelist")
if not codelist_id or codelist_id not in ct_package_data:
continue
codelist_info = ct_package_data[codelist_id]
terms = codelist_info.get("terms", [])
for term in terms:
if term.get("conceptId") == term_code:
pref_term = term.get("preferredTerm")
if pref_term:
codes.add(pref_term)
break
return codes
attributes = set(
[
node.value
for node in parse(attribute_name_map[ct_attribute]).find(
ct_package_data
)
]
)
return attributes
39 changes: 19 additions & 20 deletions cdisc_rules_engine/services/cache/cache_populator_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,26 +278,25 @@ async def _get_codelist_term_maps(self) -> List[dict]:
2. Full codelist data: Complete metadata and terms keyed by codelist ID
{
"package": "adamct-2024-03-29",
"submission_lookup": {
"GAD02PC": {"codelist": "C172334", "term": "N/A"}, # this is at codelist level
"GAD02TS": {"codelist": "C172334", "term": "C172451"}, # this is at term level
"C172334": {
"definition": "A parameter code codelist for the Generalized Anxiety Disorder - 7 Version 2 Questionnaire
(GAD-7 V2) to support the calculation of total score in ADaM.",
"extensible": False,
"name": "Generalized Anxiety Disorder - 7 Version 2 Questionnaire Parameter Code",
"preferredTerm": "CDISC ADaM Generalized Anxiety Disorder-7 Version 2 Questionnaire Parameter
Code Terminology",
"submissionValue": "GAD02PC",
"synonyms": ["Generalized Anxiety Disorder - 7 Version 2 Questionnaire Parameter Code"],
"terms": [{
"conceptId": "C172451",
"definition": "Generalized Anxiety Disorder - 7 Version 2 - Total score used for analysis.",
"preferredTerm": "Generalized Anxiety Disorder - 7 Version 2 - Total Score for Analysis",
"submissionValue": "GAD02TS",
"synonyms": ["GAD02-Total Score - Analysis"],
"extensible": False
}]
"codelists": [{
"conceptId": "C172334",
"definition": "A parameter code codelist for the Generalized Anxiety Disorder - 7 Version 2
Questionnaire (GAD-7 V2) to support the calculation of total score in ADaM.",
"extensible": False,
"name": "Generalized Anxiety Disorder - 7 Version 2 Questionnaire Parameter Code",
"preferredTerm": "CDISC ADaM Generalized Anxiety Disorder-7 Version 2 Questionnaire Parameter
Code Terminology",
"submissionValue": "GAD02PC",
"synonyms": ["Generalized Anxiety Disorder - 7 Version 2 Questionnaire Parameter Code"],
"terms": [{
"conceptId": "C172451",
"definition": "Generalized Anxiety Disorder - 7 Version 2 - Total score used for analysis.",
"preferredTerm": "Generalized Anxiety Disorder - 7 Version 2 - Total Score for Analysis",
"submissionValue": "GAD02TS",
"synonyms": ["GAD02-Total Score - Analysis"],
"extensible": False
}]
}]
}
"""
packages = self.library_service.get_all_ct_packages()
Expand Down
53 changes: 23 additions & 30 deletions cdisc_rules_engine/services/cdisc_library_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,38 +156,31 @@ def get_tig_standards(self):
def get_codelist_terms_map(self, package_version: str) -> dict:
uri = f"/mdr/ct/packages/{package_version}"
package = self._client.get_api_json(uri)
codelist_map = {"package": package_version, "submission_lookup": {}}
for codelist in package.get("codelists"):
codelist_id = codelist.get("conceptId")
codelist_map[codelist_id] = {
"definition": codelist.get("definition"),
"extensible": codelist.get("extensible", "").lower() == "true",
"name": codelist.get("name"),
"preferredTerm": codelist.get("preferredTerm"),
"submissionValue": codelist.get("submissionValue"),
"synonyms": codelist.get("synonyms", []),
"terms": [],
}
codelist_map["submission_lookup"][codelist.get("submissionValue")] = {
"codelist": codelist_id,
"term": "N/A",
}
for term in codelist.get("terms", []):
term_id = term.get("conceptId")
term_info = {
"conceptId": term_id,
"definition": term.get("definition"),
"preferredTerm": term.get("preferredTerm"),
"submissionValue": term.get("submissionValue"),
"synonyms": term.get("synonyms", []),
codelist_map = {
"package": package_version,
"codelists": [
{
"conceptId": codelist.get("conceptId"),
"definition": codelist.get("definition"),
"extensible": codelist.get("extensible", "").lower() == "true",
"name": codelist.get("name"),
"preferredTerm": codelist.get("preferredTerm"),
"submissionValue": codelist.get("submissionValue"),
"synonyms": codelist.get("synonyms", []),
"terms": [
{
"conceptId": term.get("conceptId"),
"definition": term.get("definition"),
"preferredTerm": term.get("preferredTerm"),
"submissionValue": term.get("submissionValue"),
"synonyms": term.get("synonyms", []),
}
for term in codelist.get("terms", [])
],
}
codelist_map[codelist_id]["terms"].append(term_info)
if term.get("submissionValue"):
codelist_map["submission_lookup"][term["submissionValue"]] = {
"codelist": codelist_id,
"term": term_id,
}
for codelist in package.get("codelists")
],
}
return codelist_map

def get_variable_codelists_map(
Expand Down
Loading
Loading