Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions cdisc_rules_engine/models/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,6 @@ def parse_actions(cls, actions_data: dict) -> List[dict]:

@classmethod
def parse_datasets(cls, match_key_data: List[dict]) -> List[dict]:
# Defaulting to IDVAR and IDVARVAL as relationship columns.
# May change in the future as more standard rules are written.
relationship_columns = {
"column_with_names": "IDVAR",
"column_with_values": "IDVARVAL",
}
if not match_key_data:
return None
datasets = []
Expand All @@ -206,8 +200,6 @@ def parse_datasets(cls, match_key_data: List[dict]) -> List[dict]:
],
"wildcard": data.get("Wildcard", "**"),
}
if data.get("Is_Relationship", False):
join_data["relationship_columns"] = relationship_columns
if "Join_Type" in data:
join_data["join_type"] = data.get("Join_Type")
if "Child" in data:
Expand Down
437 changes: 174 additions & 263 deletions cdisc_rules_engine/utilities/data_processor.py

Large diffs are not rendered by default.

14 changes: 4 additions & 10 deletions cdisc_rules_engine/utilities/dataset_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,20 +495,14 @@ def _merge_datasets(
dataset_preprocessor=self,
wildcard=right_dataset_domain_details.get("wildcard"),
)
elif right_dataset_domain_name == "SUPP--":
result: DatasetInterface = DataProcessor.merge_pivot_supp_dataset(
elif right_dataset_domain_name.startswith(
"SUPP"
) or right_dataset_domain_name.startswith("SQ"):
result = DataProcessor.merge_pivot_supp_dataset(
dataset_implementation=self._data_service.dataset_implementation,
left_dataset=left_dataset,
right_dataset=right_dataset,
)
elif self._rule_processor.is_relationship_dataset(right_dataset_domain_name):
result: DatasetInterface = DataProcessor.merge_relationship_datasets(
left_dataset=left_dataset,
left_dataset_match_keys=left_dataset_match_keys,
right_dataset=right_dataset,
right_dataset_match_keys=right_dataset_match_keys,
right_dataset_domain=right_dataset_domain_details,
)
else:
result: DatasetInterface = DataProcessor.merge_sdtm_datasets(
left_dataset=left_dataset,
Expand Down
16 changes: 8 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
business_rules_enhanced==1.4.8
cachetools==6.1.0
cdisc-library-client==0.1.6
click==8.1.7
dask[dataframe]==2024.6.0
dask[array]==2024.6.0
fastparquet==2024.2.0
importlib-metadata==8.5.0
jsonata-python==0.6.0
jsonpath-ng==1.6.1
jsonschema==4.18.5
lxml==5.2.1
numpy~=1.26.0
odmlib==0.1.4
openpyxl==3.1.5
pandas==2.1.4
psutil==6.1.1
pyinstaller==6.11.0
Pympler==1.1
pyreadstat==1.2.7
python-dotenv==1.0.0
pyyaml==6.0.2
redis==4.5.0
requests~=2.32.3
setuptools~=75.6.0
cachetools==6.1.0
Pympler==1.1
psutil==6.1.1
dask[dataframe]==2024.6.0
dask[array]==2024.6.0
pyreadstat==1.2.7
fastparquet==2024.2.0
lxml==5.2.1
8 changes: 2 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,10 +876,6 @@ def dataset_rule_record_in_parent_domain_equal_to() -> dict:
{
"domain_name": "SUPPEC",
"match_key": ["USUBJID"],
"relationship_columns": {
"column_with_names": "IDVAR",
"column_with_values": "IDVARVAL",
},
}
],
"conditions": ConditionCompositeFactory.get_condition_composite(
Expand All @@ -888,7 +884,7 @@ def dataset_rule_record_in_parent_domain_equal_to() -> dict:
{
"name": "get_dataset",
"operator": "equal_to",
"value": {"target": "QNAM", "comparator": "ECREASOC"},
"value": {"target": "ECREASOC", "comparator": "Some Value 1"},
},
{
"name": "get_dataset",
Expand All @@ -905,7 +901,7 @@ def dataset_rule_record_in_parent_domain_equal_to() -> dict:
}
],
"output_variables": [
"QNAM",
"ECREASOC",
"ECPRESP",
],
}
Expand Down
205 changes: 99 additions & 106 deletions tests/resources/CoreIssue747/Rule_underscores.json
Original file line number Diff line number Diff line change
@@ -1,118 +1,111 @@
{
"Authorities": [
"Authorities": [
{
"Organization": "CDISC",
"Standards": [
{
"Organization": "CDISC",
"Standards": [
{
"Name": "SDTMIG",
"References": [
{
"Citations": [
{
"Document": "IG v3.4",
"Section": "Table 3.2.1",
"Cited_Guidance": "Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different."
},
{
"Document": "IG v3.4",
"Section": "3.2.1.1",
"Cited_Guidance": "Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order. The identified keys for each dataset should be consistent with the description of the dataset structure as described in the Define-XML document."
}
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
],
"Version": "3.4"
},
"Name": "SDTMIG",
"References": [
{
"Citations": [
{
"Name": "SDTMIG",
"References": [
{
"Citations": [
{
"Document": "IG v3.2",
"Section": "Table 3.2.1|3.2.1.1",
"Cited_Guidance": "Table 3.2.1[Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different.]|3.2.1.1[Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order.]"
}
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
],
"Version": "3.2"
"Document": "IG v3.4",
"Section": "Table 3.2.1",
"Cited_Guidance": "Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different."
},
{
"Name": "SDTMIG",
"References": [
{
"Citations": [
{
"Document": "IG v3.3",
"Section": "Table 3.2.1|3.2.1.1",
"Cited_Guidance": "Table 3.2.1[Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different.]||3.2.1.1[Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order.]"
}
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
],
"Version": "3.3"
"Document": "IG v3.4",
"Section": "3.2.1.1",
"Cited_Guidance": "Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order. The identified keys for each dataset should be consistent with the description of the dataset structure as described in the Define-XML document."
}
]
}
],
"Check": {
"all": [
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
],
"Version": "3.4"
},
{
"Name": "SDTMIG",
"References": [
{
"name": "define_dataset_key_sequence",
"operator": "is_not_unique_set"
"Citations": [
{
"Document": "IG v3.2",
"Section": "Table 3.2.1|3.2.1.1",
"Cited_Guidance": "Table 3.2.1[Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different.]|3.2.1.1[Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order.]"
}
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
]
},
"Core": {
"Id": "CDISC.SDTMIG.CG0019",
"Status": "Draft",
"Version": "1"
},
"Description": "Trigger error if records are not unique as per sponsor defined key variables as documented in the define.xml",
"Executability": "Fully Executable",
"Outcome": {
"Message": "Records are not unique as per sponsor defined key variables as documented in the define.xml"
},
"Scope": {
"Classes": {
"Include": [
"ALL"
]
],
"Version": "3.2"
},
"Domains": {
"Include": [
"ALL"
]
}
},
"Sensitivity": "Record",
"Match_Datasets": [
{
"Keys": [
"USUBJID"
],
"Name": "SUPP--",
"Is_Relationship": true
"Name": "SDTMIG",
"References": [
{
"Citations": [
{
"Document": "IG v3.3",
"Section": "Table 3.2.1|3.2.1.1",
"Cited_Guidance": "Table 3.2.1[Note that the key variables shown in this table are examples only. A sponsor's actual key structure may be different.]||3.2.1.1[Since the purpose of this column is to aid reviewers in understanding the structure of a dataset, sponsors should list all of the natural keys (see definition below) for the dataset. These keys should define uniqueness for records within a dataset, and may define a record sort order.]"
}
],
"Origin": "SDTM and SDTMIG Conformance Rules",
"Version": "2.0",
"Rule_Identifier": {
"Id": "CG0019",
"Version": "1"
}
}
],
"Version": "3.3"
}
],
"Rule_Type": "Dataset Contents Check against Define XML"
]
}
],
"Check": {
"all": [
{
"name": "define_dataset_key_sequence",
"operator": "is_not_unique_set"
}
]
},
"Core": {
"Id": "CDISC.SDTMIG.CG0019",
"Status": "Draft",
"Version": "1"
},
"Description": "Trigger error if records are not unique as per sponsor defined key variables as documented in the define.xml",
"Executability": "Fully Executable",
"Outcome": {
"Message": "Records are not unique as per sponsor defined key variables as documented in the define.xml"
},
"Scope": {
"Classes": {
"Include": ["ALL"]
},
"Domains": {
"Include": ["ALL"]
}
},
"Sensitivity": "Record",
"Match_Datasets": [
{
"Keys": ["USUBJID"],
"Name": "SUPP--"
}
],
"Rule_Type": "Dataset Contents Check against Define XML"
}
Loading
Loading