Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions cdisc_rules_engine/utilities/data_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
DataServiceFactory,
DummyDataService,
)
from cdisc_rules_engine.exceptions.custom_exceptions import PreprocessingError
from cdisc_rules_engine.utilities.utils import (
search_in_list_of_dicts,
)
Expand Down Expand Up @@ -211,6 +212,9 @@ def merge_pivot_supp_dataset(
for key in static_keys
if key in left_dataset.columns and key in right_dataset.columns
]
DataProcessor._validate_merge_key_overlap(
left_dataset, right_dataset, common_keys
)
if not is_blank:
common_keys.append(dynamic_key)
current_supp = right_dataset.rename(columns={"IDVARVAL": dynamic_key})
Expand Down Expand Up @@ -287,6 +291,9 @@ def _merge_supp_with_multiple_idvars(
for key in static_keys
if key in result_dataset.columns and key in group_data.columns
]
DataProcessor._validate_merge_key_overlap(
result_dataset, group_data, common_keys
)
common_keys.append(idvar_value)

agg_dict = {
Expand Down Expand Up @@ -480,3 +487,19 @@ def column_metadata_equal_to_define_and_library(
@staticmethod
def is_dummy_data(data_service: DataServiceInterface) -> bool:
return isinstance(data_service, DummyDataService)

@staticmethod
def _validate_merge_key_overlap(
left_dataset: DatasetInterface,
right_dataset: DatasetInterface,
common_keys: List[str],
):
for key in common_keys:
left_values = set(left_dataset[key].dropna().unique())
right_values = set(right_dataset[key].dropna().unique())
if left_values and right_values and left_values.isdisjoint(right_values):
raise PreprocessingError(
f"SUPP merge key '{key}' has no overlapping values between "
f"parent dataset and SUPP dataset. "
f"Parent values: {left_values}, SUPP values: {right_values}."
)
65 changes: 64 additions & 1 deletion tests/unit/test_dataset_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from cdisc_rules_engine.models.library_metadata_container import (
LibraryMetadataContainer,
)

from cdisc_rules_engine.exceptions.custom_exceptions import PreprocessingError
from cdisc_rules_engine.models.dataset import PandasDataset


Expand Down Expand Up @@ -1291,6 +1291,69 @@ def test_dm_merged_with_suppdm_without_dupes(
assert result.data.loc[0, ["RACE1", "RACE2", "RACE3"]].notna().all()


@patch("cdisc_rules_engine.services.data_services.LocalDataService.get_dataset")
def test_preprocess_suppae_mismatched_studyid_raises_key_error(mock_get_dataset):
ae_dataset = PandasDataset(
pd.DataFrame(
{
"STUDYID": ["CDISC-PILOT-01"],
"DOMAIN": ["AE"],
"USUBJID": ["S001"],
"AESEQ": [1],
"AETERM": ["Headache"],
}
)
)
suppae_dataset = PandasDataset(
pd.DataFrame(
{
"STUDYID": ["COMPLETELY-DIFFERENT-STUDY"],
"RDOMAIN": ["AE"],
"USUBJID": ["S001"],
"IDVAR": ["AESEQ"],
"IDVARVAL": ["1"],
"QNAM": ["TEST"],
"QLABEL": ["Test"],
"QVAL": ["A"],
}
)
)

mock_get_dataset.return_value = suppae_dataset
rule = {
"core_id": "TestRule",
"datasets": [{"domain_name": "SUPPAE", "match_key": ["USUBJID"]}],
"conditions": ConditionCompositeFactory.get_condition_composite(
{
"all": [
{
"name": "get_dataset",
"operator": "equal_to",
"value": {"target": "TEST", "comparator": "A"},
}
]
}
),
}
datasets = [
SDTMDatasetMetadata(
name="SUPPAE", first_record={"RDOMAIN": "AE"}, filename="suppae.xpt"
)
]
data_service = LocalDataService(MagicMock(), MagicMock(), MagicMock())
preprocessor = DatasetPreprocessor(
ae_dataset,
SDTMDatasetMetadata(first_record={"DOMAIN": "AE"}, full_path="path"),
data_service,
InMemoryCacheService(),
)

with pytest.raises(
PreprocessingError, match="SUPP merge key 'STUDYID' has no overlapping values"
):
preprocessor.preprocess(rule, datasets)


def test_relrec_processed_correctly_with_others(rule_with_specific_supp):
ec_meta = SDTMDatasetMetadata(
name="EC",
Expand Down
Loading