From 821e67ab80b668df1afaad744f0d9efe2a48f6c1 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 4 Nov 2025 09:41:25 -0500 Subject: [PATCH 1/3] attribute incorrectly named --- cdisc_rules_engine/utilities/rule_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index de335af09..836d81b49 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -394,7 +394,7 @@ def perform_rule_operations( ct_package_type=RuleProcessor._ct_package_type_api_name( operation.get("ct_package_type") ), - ct_attribute=operation.get("attribute"), + ct_attribute=operation.get("ct_attribute"), ct_package_types=[ RuleProcessor._ct_package_type_api_name(ct_package_type) for ct_package_type in operation.get("ct_package_types", []) From 88a4d2f1809c5663d07d2e673d0de707f439c42d Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Wed, 5 Nov 2025 16:56:37 -0500 Subject: [PATCH 2/3] tests, operator working --- .../check_operators/dataframe_operators.py | 6 + cdisc_rules_engine/models/operation_params.py | 1 - .../operations/get_codelist_attributes.py | 182 ++++++-- cdisc_rules_engine/rules_engine.py | 1 - .../utilities/rule_processor.py | 5 +- .../test_get_codelist_attributes.py | 402 ++++++++++++++++-- 6 files changed, 518 insertions(+), 79 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 0e8eda382..9faea2be4 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -683,6 +683,12 @@ def is_contained_by(self, other_value): elif self.is_column_of_iterables(comparison_data): results = vectorized_is_in(target_data, comparison_data) else: + if isinstance(comparison_data, pd.Series): + comparison_data = comparison_data.apply( + lambda x: list(x) if isinstance(x, set) else x + ) + elif isinstance(comparison_data, set): + comparison_data = list(comparison_data) results = target_data.isin(comparison_data) return self.value.convert_to_series(results) diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py index 55907a959..3f264531f 100644 --- a/cdisc_rules_engine/models/operation_params.py +++ b/cdisc_rules_engine/models/operation_params.py @@ -36,7 +36,6 @@ class OperationParams: codelists: list = None ct_attribute: str = None ct_package_types: List[str] = None - ct_package: list = None ct_packages: list = None ct_version: str = None ct_package_type: str = None diff --git a/cdisc_rules_engine/operations/get_codelist_attributes.py b/cdisc_rules_engine/operations/get_codelist_attributes.py index bf887a504..23aea7f74 100644 --- a/cdisc_rules_engine/operations/get_codelist_attributes.py +++ b/cdisc_rules_engine/operations/get_codelist_attributes.py @@ -3,6 +3,28 @@ from cdisc_rules_engine.models.dataset import DaskDataset +def _get_ct_package_dask( + row, ct_target, ct_version, ct_packages, standard, substandard +): + if pd.isna(row[ct_version]) or str(row[ct_version]).strip() == "": + return "" + target_val = str(row[ct_target]).strip() if pd.notna(row[ct_target]) else "" + if target_val in ("CDISC", "CDISC CT"): + std = standard.lower() + if "tig" in std: + std = substandard.lower() + if "adam" in std: + prefix = "adamct" + elif "send" in std: + prefix = "sendct" + else: + prefix = "sdtmct" + pkg = f"{prefix}-{row[ct_version]}" + else: + pkg = f"{target_val}-{row[ct_version]}" + return pkg if pkg in ct_packages else "" + + class CodeListAttributes(BaseOperation): """ A class for fetching codelist attributes for a trial summary domain. @@ -42,23 +64,59 @@ def _get_codelist_attributes(self): ct_name = "CT_PACKAGE" # a column for controlled term package names # Get controlled term attribute column name specified in rule ct_attribute = self.params.ct_attribute - + ct_target = self.params.target + ct_version = self.params.ct_version + ct_packages = self.params.ct_packages + df = self.params.dataframe # 2.0 build codelist from cache # ------------------------------------------------------------------- ct_cache = self._get_ct_from_library_metadata( ct_key=ct_name, ct_val=ct_attribute ) - # 3.0 get dataset records - # ------------------------------------------------------------------- - ct_data = self._get_ct_from_dataset(ct_key=ct_name, ct_val=ct_attribute) + def get_ct_package(row): + if pd.isna(row[ct_version]) or str(row[ct_version]).strip() == "": + return "" + target_val = str(row[ct_target]).strip() if pd.notna(row[ct_target]) else "" + # Handle CDISC CT packages + if target_val in ("CDISC", "CDISC CT"): + standard = self.params.standard.lower() + if "tig" in standard: + # use substandard for relevant TIG CT + standard = self.params.standard_substandard.lower() + if "adam" in standard: + prefix = "adamct" + elif "send" in standard: + prefix = "sendct" + else: + prefix = "sdtmct" + pkg = f"{prefix}-{row[ct_version]}" + else: + # Handle external codelists + pkg = f"{target_val}-{row[ct_version]}" + return pkg if pkg in ct_packages else "" - # 4.0 merge the two datasets by CC - # ------------------------------------------------------------------- - cc_key = ct_data[ct_name] - ct_list = ct_cache[(ct_cache[ct_name].isin(cc_key))] - ds_len = self.params.dataframe.len() - result = pd.Series([ct_list[ct_attribute].values[0] for _ in range(ds_len)]) + if isinstance(df, DaskDataset): + row_packages = df.data.apply( + _get_ct_package_dask, + axis=1, + meta=(None, "object"), + args=( + ct_target, + ct_version, + ct_packages, + self.params.standard, + self.params.standard_substandard, + ), + ) + else: + row_packages = df.data.apply(get_ct_package, axis=1) + package_to_codelist = {} + for _, row in ct_cache.iterrows(): + package_to_codelist[row[ct_name]] = row[ct_attribute] + result = row_packages.apply( + lambda pkg: package_to_codelist.get(pkg, set()) if pkg else set() + ) return result def _get_ct_from_library_metadata(self, ct_key: str, ct_val: str): @@ -75,14 +133,16 @@ def _get_ct_from_library_metadata(self, ct_key: str, ct_val: str): retrieved from the cache. """ ct_packages = self.params.ct_packages - ct_term_maps = ( - [] - if ct_packages is None - else [ + ct_term_maps = [] + for package in ct_packages: + parts = package.rsplit("-", 3) + if len(parts) >= 4: + ct_package_type = parts[0] + version = "-".join(parts[1:]) + self.library_metadata._load_ct_package_data(ct_package_type, version) + ct_term_maps.append( self.library_metadata.get_ct_package_metadata(package) or {} - for package in ct_packages - ] - ) + ) # convert codelist to dataframe ct_result = {ct_key: [], ct_val: []} @@ -138,21 +198,81 @@ def _get_ct_from_dataset(self, ct_key: str, ct_val: str): return result def _add_codelist(self, ct_key, ct_val, ct_term_maps, ct_result): - """ - Adds codelist information to the result dictionary. - - Args: - ct_key (str): The key for identifying the codelist. - ct_val (str): The value associated with the codelist. - ct_term_maps (list[dict]): A list of dictionaries containing - codelist information. - ct_result (dict): The dictionary to store the codelist information. - - Returns: - dict: The updated ct_result dictionary. - """ for item in ct_term_maps: ct_result[ct_key].append(item.get("package")) - codes = set(code for code in item.keys() if code != "package") + codes = self._extract_codes_by_attribute(item, ct_val) ct_result[ct_val].append(codes) return ct_result + + def _extract_codes_by_attribute( + self, ct_package_data: dict, ct_attribute: str + ) -> set: + submission_lookup = ct_package_data.get("submission_lookup", {}) + + if ct_attribute == "Term CCODE": + return self._extract_term_codes(submission_lookup) + elif ct_attribute == "Codelist CCODE": + return self._extract_codelist_codes(submission_lookup) + elif ct_attribute in ("Term Value", "Term Submission Value"): + return self._extract_term_values(submission_lookup) + elif ct_attribute == "Codelist Value": + return self._extract_codelist_values(submission_lookup) + elif ct_attribute == "Term Preferred Term": + return self._extract_preferred_terms(submission_lookup, ct_package_data) + else: + raise ValueError(f"Unsupported ct_attribute: {ct_attribute}") + + def _extract_codelist_values(self, submission_lookup: dict) -> set: + codes = set() + for term_name, term_data in submission_lookup.items(): + term_code = term_data.get("term") + if term_code and term_code == "N/A": + codes.add(term_name) + return codes + + def _extract_term_codes(self, submission_lookup: dict) -> set: + codes = set() + for term_data in submission_lookup.values(): + term_code = term_data.get("term") + if term_code and term_code != "N/A": + codes.add(term_code) + return codes + + def _extract_codelist_codes(self, submission_lookup: dict) -> set: + codes = set() + for term_data in submission_lookup.values(): + codelist_code = term_data.get("codelist") + if codelist_code: + codes.add(codelist_code) + return codes + + def _extract_term_values(self, submission_lookup: dict) -> set: + codes = set() + for term_name, term_data in submission_lookup.items(): + term_code = term_data.get("term") + if term_code and term_code != "N/A": + codes.add(term_name) + return codes + + def _extract_preferred_terms( + self, submission_lookup: dict, ct_package_data: dict + ) -> set: + codes = set() + for term_name, term_data in submission_lookup.items(): + if not isinstance(term_data, dict): + continue + term_code = term_data.get("term") + if not term_code or term_code == "N/A": + continue + codelist_id = term_data.get("codelist") + if not codelist_id or codelist_id not in ct_package_data: + continue + codelist_info = ct_package_data[codelist_id] + terms = codelist_info.get("terms", []) + for term in terms: + if term.get("conceptId") == term_code: + pref_term = term.get("preferredTerm") + if pref_term: + codes.add(pref_term) + break + return codes diff --git a/cdisc_rules_engine/rules_engine.py b/cdisc_rules_engine/rules_engine.py index bcb3b37ab..795f8d185 100644 --- a/cdisc_rules_engine/rules_engine.py +++ b/cdisc_rules_engine/rules_engine.py @@ -92,7 +92,6 @@ def __init__( ) self.data_processor = DataProcessor(self.data_service, self.cache) self.ct_packages = kwargs.get("ct_packages", []) - self.ct_package = kwargs.get("ct_package") self.external_dictionaries = external_dictionaries self.define_xml_path: str = kwargs.get("define_xml_path") self.validate_xml: bool = kwargs.get("validate_xml") diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index 836d81b49..4751c93d2 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -390,7 +390,7 @@ def perform_rule_operations( standard_version=standard_version, standard_substandard=standard_substandard, external_dictionaries=external_dictionaries, - ct_version=operation.get("version"), + ct_version=operation.get("ct_version"), ct_package_type=RuleProcessor._ct_package_type_api_name( operation.get("ct_package_type") ), @@ -399,8 +399,7 @@ def perform_rule_operations( RuleProcessor._ct_package_type_api_name(ct_package_type) for ct_package_type in operation.get("ct_package_types", []) ], - ct_packages=kwargs.get("ct_packages"), - ct_package=kwargs.get("codelist_term_maps"), + ct_packages=operation.get("ct_packages", kwargs.get("ct_packages", [])), attribute_name=operation.get("attribute_name", ""), key_name=operation.get("key_name", ""), key_value=operation.get("key_value", ""), diff --git a/tests/unit/test_operations/test_get_codelist_attributes.py b/tests/unit/test_operations/test_get_codelist_attributes.py index bc46faafe..97eaf67e6 100644 --- a/tests/unit/test_operations/test_get_codelist_attributes.py +++ b/tests/unit/test_operations/test_get_codelist_attributes.py @@ -6,7 +6,6 @@ ) import pandas as pd import pytest -from typing import List from cdisc_rules_engine.models.operation_params import OperationParams @@ -30,15 +29,91 @@ [ { "package": "sdtmct-2020-03-27", - "C49487": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C25473": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "N": {"codelist": "C49487", "term": "C49487"}, + "Y": {"codelist": "C25473", "term": "C25473"}, + "MAYBE": {"codelist": "C141663", "term": "C141663"}, + }, + "C49487": { + "extensible": False, + "preferredTerm": "No", + "submissionValue": "N", + "terms": [ + { + "conceptId": "C49487", + "submissionValue": "N", + "preferredTerm": "No", + } + ], + }, + "C25473": { + "extensible": False, + "preferredTerm": "Yes", + "submissionValue": "Y", + "terms": [ + { + "conceptId": "C25473", + "submissionValue": "Y", + "preferredTerm": "Yes", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Maybe", + "submissionValue": "MAYBE", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "MAYBE", + "preferredTerm": "Maybe", + } + ], + }, }, { "package": "sdtmct-2022-12-16", - "C141657": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C141656": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "A": {"codelist": "C141657", "term": "C141657"}, + "B": {"codelist": "C141656", "term": "C141656"}, + "C": {"codelist": "C141663", "term": "C141663"}, + }, + "C141657": { + "extensible": False, + "preferredTerm": "Option A", + "submissionValue": "A", + "terms": [ + { + "conceptId": "C141657", + "submissionValue": "A", + "preferredTerm": "Option A", + } + ], + }, + "C141656": { + "extensible": False, + "preferredTerm": "Option B", + "submissionValue": "B", + "terms": [ + { + "conceptId": "C141656", + "submissionValue": "B", + "preferredTerm": "Option B", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Option C", + "submissionValue": "C", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "C", + "preferredTerm": "Option C", + } + ], + }, }, ], PandasDataset, @@ -64,20 +139,97 @@ [ { "package": "sdtmct-2020-03-27", - "C49487": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C25473": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "N": {"codelist": "C49487", "term": "C49487"}, + "Y": {"codelist": "C25473", "term": "C25473"}, + "MAYBE": {"codelist": "C141663", "term": "C141663"}, + }, + "C49487": { + "extensible": False, + "preferredTerm": "No", + "submissionValue": "N", + "terms": [ + { + "conceptId": "C49487", + "submissionValue": "N", + "preferredTerm": "No", + } + ], + }, + "C25473": { + "extensible": False, + "preferredTerm": "Yes", + "submissionValue": "Y", + "terms": [ + { + "conceptId": "C25473", + "submissionValue": "Y", + "preferredTerm": "Yes", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Maybe", + "submissionValue": "MAYBE", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "MAYBE", + "preferredTerm": "Maybe", + } + ], + }, }, { "package": "sdtmct-2022-12-16", - "C141657": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C141656": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "A": {"codelist": "C141657", "term": "C141657"}, + "B": {"codelist": "C141656", "term": "C141656"}, + "C": {"codelist": "C141663", "term": "C141663"}, + }, + "C141657": { + "extensible": False, + "preferredTerm": "Option A", + "submissionValue": "A", + "terms": [ + { + "conceptId": "C141657", + "submissionValue": "A", + "preferredTerm": "Option A", + } + ], + }, + "C141656": { + "extensible": False, + "preferredTerm": "Option B", + "submissionValue": "B", + "terms": [ + { + "conceptId": "C141656", + "submissionValue": "B", + "preferredTerm": "Option B", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Option C", + "submissionValue": "C", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "C", + "preferredTerm": "Option C", + } + ], + }, }, ], PandasDataset, {"C141656", "C141663", "C141657"}, ) + test_set3 = ( ["sdtmct-2020-03-27"], { @@ -91,15 +243,91 @@ [ { "package": "sdtmct-2020-03-27", - "C49487": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C25473": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "N": {"codelist": "C49487", "term": "C49487"}, + "Y": {"codelist": "C25473", "term": "C25473"}, + "MAYBE": {"codelist": "C141663", "term": "C141663"}, + }, + "C49487": { + "extensible": False, + "preferredTerm": "No", + "submissionValue": "N", + "terms": [ + { + "conceptId": "C49487", + "submissionValue": "N", + "preferredTerm": "No", + } + ], + }, + "C25473": { + "extensible": False, + "preferredTerm": "Yes", + "submissionValue": "Y", + "terms": [ + { + "conceptId": "C25473", + "submissionValue": "Y", + "preferredTerm": "Yes", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Maybe", + "submissionValue": "MAYBE", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "MAYBE", + "preferredTerm": "Maybe", + } + ], + }, }, { "package": "sdtmct-2022-12-16", - "C141657": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C141656": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "A": {"codelist": "C141657", "term": "C141657"}, + "B": {"codelist": "C141656", "term": "C141656"}, + "C": {"codelist": "C141663", "term": "C141663"}, + }, + "C141657": { + "extensible": False, + "preferredTerm": "Option A", + "submissionValue": "A", + "terms": [ + { + "conceptId": "C141657", + "submissionValue": "A", + "preferredTerm": "Option A", + } + ], + }, + "C141656": { + "extensible": False, + "preferredTerm": "Option B", + "submissionValue": "B", + "terms": [ + { + "conceptId": "C141656", + "submissionValue": "B", + "preferredTerm": "Option B", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Option C", + "submissionValue": "C", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "C", + "preferredTerm": "Option C", + } + ], + }, }, ], DaskDataset, @@ -125,15 +353,91 @@ [ { "package": "sdtmct-2020-03-27", - "C49487": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C25473": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "N": {"codelist": "C49487", "term": "C49487"}, + "Y": {"codelist": "C25473", "term": "C25473"}, + "MAYBE": {"codelist": "C141663", "term": "C141663"}, + }, + "C49487": { + "extensible": False, + "preferredTerm": "No", + "submissionValue": "N", + "terms": [ + { + "conceptId": "C49487", + "submissionValue": "N", + "preferredTerm": "No", + } + ], + }, + "C25473": { + "extensible": False, + "preferredTerm": "Yes", + "submissionValue": "Y", + "terms": [ + { + "conceptId": "C25473", + "submissionValue": "Y", + "preferredTerm": "Yes", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Maybe", + "submissionValue": "MAYBE", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "MAYBE", + "preferredTerm": "Maybe", + } + ], + }, }, { "package": "sdtmct-2022-12-16", - "C141657": {"extensible": False, "allowed_terms": ["A", "B", "C"]}, - "C141656": {"extensible": False, "allowed_terms": ["X", "Y", "Z"]}, - "C141663": {"extensible": False, "allowed_terms": []}, + "submission_lookup": { + "A": {"codelist": "C141657", "term": "C141657"}, + "B": {"codelist": "C141656", "term": "C141656"}, + "C": {"codelist": "C141663", "term": "C141663"}, + }, + "C141657": { + "extensible": False, + "preferredTerm": "Option A", + "submissionValue": "A", + "terms": [ + { + "conceptId": "C141657", + "submissionValue": "A", + "preferredTerm": "Option A", + } + ], + }, + "C141656": { + "extensible": False, + "preferredTerm": "Option B", + "submissionValue": "B", + "terms": [ + { + "conceptId": "C141656", + "submissionValue": "B", + "preferredTerm": "Option B", + } + ], + }, + "C141663": { + "extensible": False, + "preferredTerm": "Option C", + "submissionValue": "C", + "terms": [ + { + "conceptId": "C141663", + "submissionValue": "C", + "preferredTerm": "Option C", + } + ], + }, }, ], DaskDataset, @@ -154,18 +458,18 @@ def test_get_codelist_attributes( ct_list, ): """ - Unit test for DataProcessor.get_column_order_from_library. - Mocks cache call to return metadata. + Unit test for CodeListAttributes operation. + Tests that the operation returns the correct term codes based on CT version. """ # 1.0 set parameters operation_params.dataframe = dataset_type.from_dict(ts_data) operation_params.domain = "TS" operation_params.standard = "sdtmig" operation_params.standard_version = "3-4" - operation_params.ct_attribute: str = "TSVALCD" - operation_params.ct_version: str = "TSVCDVER" + operation_params.ct_attribute = "Term CCODE" # Changed from TSVALCD + operation_params.ct_version = "TSVCDVER" operation_params.target = "TSVCDREF" - operation_params.ct_packages: list = ct_packages + operation_params.ct_packages = ct_packages # 2.0 add CT data to cache cache = InMemoryCacheService.get_instance() @@ -187,16 +491,28 @@ def test_get_codelist_attributes( library_metadata, ) - result: pd.DataFrame = operation.execute() - - variables: List[str] = ct_list - expected: pd.Series = pd.Series( - [ - variables, - variables, - variables, - variables, - variables, - ] - ) - assert result[operation_params.operation_id].equals(expected) + result = operation.execute() + + # Extract the operation_id column which contains the sets + result_series = result[operation_params.operation_id] + + # Expected: Each row gets the ct_list only if its version matches ct_packages + # For test_set1 and test_set3: All rows with version 2020-03-27 should get ct_list + # For test_set2 and test_set4: Only rows 3 and 4 with version 2022-12-16 should get ct_list + + if ct_packages == ["sdtmct-2020-03-27"]: + # Rows 0, 1, 2 have version 2020-03-27 (match) + # Rows 3, 4 have empty version (no match) + expected = pd.Series([ct_list, ct_list, ct_list, set(), set()]) + else: # ct_packages == ["sdtmct-2022-12-16"] + # Rows 0, 1, 2 have version 2020-03-27 (no match) + # Rows 3, 4 have version 2022-12-16 (match) + expected = pd.Series([set(), set(), set(), ct_list, ct_list]) + + # Compare the series - each element should already be a set + assert len(result_series) == len(expected) + for i in range(len(result_series)): + # Both result_series.iloc[i] and expected.iloc[i] should be sets already + assert ( + result_series.iloc[i] == expected.iloc[i] + ), f"Row {i}: {result_series.iloc[i]} != {expected.iloc[i]}" From 7c40c1b66dc45bcc29723d4f61963fc54f00dddb Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Wed, 5 Nov 2025 20:07:36 -0500 Subject: [PATCH 3/3] docs --- resources/schema/Operations.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/resources/schema/Operations.md b/resources/schema/Operations.md index 238697c32..e6837a381 100644 --- a/resources/schema/Operations.md +++ b/resources/schema/Operations.md @@ -152,16 +152,25 @@ Returns a list of valid extensible codelist term's submission values. Used for e ### get_codelist_attributes -Fetches attribute values for a codelist specified in a dataset (like TS) +Fetches controlled terminology attribute values from CT packages based on row-specific CT package and version references. + +**Required Parameters:** + +- `ct_attribute`: Attribute to extract - `"Term CCODE"`, `"Codelist CCODE"`, `"Term Value"`, `"Codelist Value"`, or `"Term Preferred Term"` +- `target`: Column containing CT reference (e.g., "TSVCDREF") +- `ct_version`: Column containing CT version (e.g., "TSVCDVER") +- `ct_packages`: List of CT packages to search (e.g., `["sdtmct-2020-03-27"]`) ```yaml -- id: $TERM_CCODES +- id: $VALID_TERM_CODES name: TSVCDREF operator: get_codelist_attributes ct_attribute: Term CCODE ct_version: TSVCDVER + target: TSVCDREF ct_packages: - sdtmct-2020-03-27 + - sdtmct-2022-12-16 ``` ### valid_codelist_dates