Skip to content
Merged

Fb0405 #1615

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ def build(self):
records = []
for define_item in all_define_metadata:
domain_name = define_item.get("define_dataset_name", "")
filename = domain_files.get(domain_name)
record = {
"domain": domain_name,
"filename": domain_files.get(domain_name),
**define_item,
"domain": domain_name if filename is not None else None,
"filename": filename,
}
records.append(record)

return self.dataset_implementation.from_records(records)
1 change: 1 addition & 0 deletions cdisc_rules_engine/enums/domain_presence_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
class DomainPresenceValues(BaseEnum):
DATASET = "STUDY"
RECORD = ""
DOMAIN = "N/A"
1 change: 1 addition & 0 deletions cdisc_rules_engine/enums/sensitivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ class Sensitivity(BaseEnum):
DATASET = "Dataset"
RECORD = "Record"
GROUP = "Group"
STUDY = "Study"
5 changes: 5 additions & 0 deletions cdisc_rules_engine/models/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def generate_dataset_error_objects(self, message: str, results: pd.Series):
)
if "domain presence" in self.rule.get("rule_type", "").lower():
error_object.dataset = DomainPresenceValues.DATASET.value
error_object.domain = DomainPresenceValues.DOMAIN.value
for error in error_object.errors:
error.dataset = DomainPresenceValues.DATASET.value
error.row = DomainPresenceValues.RECORD.value
Expand Down Expand Up @@ -177,6 +178,10 @@ def generate_targeted_error_object( # noqa: C901
errors_list = self._generate_errors_by_target_presence(
data, targets_not_in_dataset, all_targets_missing, errors_df
)
elif self.rule.get("sensitivity") == Sensitivity.STUDY.value:
errors_list = self._generate_errors_by_target_presence(
data, targets_not_in_dataset, all_targets_missing, errors_df
)
elif self.rule.get("sensitivity") == Sensitivity.GROUP.value:
grouping_variables = self.rule.get("grouping_variables", [])

Expand Down
18 changes: 17 additions & 1 deletion cdisc_rules_engine/rules_engine.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from copy import deepcopy
from typing import Iterable, List, Union
from dateutil.parser._parser import ParserError
import traceback

from business_rules import export_rule_data
from business_rules.engine import run
from cdisc_rules_engine.config import config as default_config
Expand Down Expand Up @@ -58,7 +60,7 @@
ExternalDictionariesContainer,
)
from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata
import traceback
from cdisc_rules_engine.enums.sensitivity import Sensitivity


class RulesEngine:
Expand Down Expand Up @@ -174,6 +176,8 @@ def validate_single_rule(self, rule: dict, datasets: Iterable[SDTMDatasetMetadat
)
if limit_reached:
break
if rule.get("sensitivity") == Sensitivity.STUDY.value:
results = self._collapse_to_study_result(results)
return results

def _update_total_errors_and_check_limit(
Expand Down Expand Up @@ -671,3 +675,15 @@ def handle_validation_exceptions( # noqa
message=message,
status=ExecutionStatus.EXECUTION_ERROR.value,
)

def _collapse_to_study_result(self, results: dict) -> dict:
"""
For study sensitivity rules, collapse all per-dataset results into a single
study-level result using the first non-skipped result as the representative.
"""
for key, dataset_results in results.items():
for result in dataset_results:
if result.get("executionStatus") != ExecutionStatus.SKIPPED.value:
return {"study": [result]}
first_key = next(iter(results))
return {"study": results[first_key]}
2 changes: 1 addition & 1 deletion resources/schema/rule/Rule_Type.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ all:

One row per dataset defined in Define-XML:

- `domain`
- `domain` - The domain if the dataset exists, null otherwise
- `filename` - The file name if dataset exists, null otherwise
- `define_dataset_name`
- `define_dataset_label`
Expand Down
3 changes: 3 additions & 0 deletions resources/schema/rule/Sensitivity.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
},
{
"const": "Group"
},
{
"const": "Study"
}
],
"markdownDescription": "Determines what level of granularity issues should be generated within the report"
Expand Down
4 changes: 4 additions & 0 deletions resources/schema/rule/Sensitivity.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## Study

Will report once per submitted data study. Lends itself to Domain Presence Check rule types as these are cross-study checks that do not involve within-dataset checks. Reports one result per failing row in the dataset generated by the rule type, collapsed to a single study-level result.

## Dataset

Report one result per dataset generated by the `Rule Type`, where a dataset or record within the dataset matches the rule failure criteria
Expand Down
Binary file modified resources/templates/report-template.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@
],
},
{
"domain": "SE",
"domain": None,
"filename": None,
"define_dataset_name": "SE",
"define_dataset_label": "Subject Elements",
Expand Down Expand Up @@ -241,7 +241,7 @@
pd.DataFrame(
[
{
"domain": "AE",
"domain": None,
"filename": None,
"define_dataset_name": "AE",
"define_dataset_label": "Adverse Events",
Expand All @@ -260,7 +260,7 @@
],
},
{
"domain": "DM",
"domain": None,
"filename": None,
"define_dataset_name": "DM",
"define_dataset_label": "Demographics",
Expand All @@ -279,7 +279,7 @@
],
},
{
"domain": "SE",
"domain": None,
"filename": None,
"define_dataset_name": "SE",
"define_dataset_label": "Subject Elements",
Expand All @@ -293,7 +293,7 @@
"define_dataset_variables": ["STUDYID", "USUBJID", "SESEQ"],
},
{
"domain": "EC",
"domain": None,
"filename": None,
"define_dataset_name": "EC",
"define_dataset_label": "Exposure as Collected",
Expand Down Expand Up @@ -344,7 +344,11 @@ def test_domain_list_with_define_dataset_builder(
if expected_results.empty:
assert result_df.empty, f"Expected empty DataFrame for {test_description}"
else:
assert list(result_df.columns) == list(
assert set(result_df.columns) == set(
expected_results.columns
), f"Columns do not match for {test_description}"
pd.testing.assert_frame_equal(result_df, expected_results, check_dtype=False)
pd.testing.assert_frame_equal(
result_df.reindex(columns=expected_results.columns),
expected_results,
check_dtype=False,
)
2 changes: 1 addition & 1 deletion tests/unit/test_rules_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ def test_rule_with_domain_prefix_replacement(mock_get_dataset: MagicMock):
{
"executionStatus": ExecutionStatus.ISSUE_REPORTED.value,
"dataset": "STUDY",
"domain": "AE",
"domain": "N/A",
"variables": ["AE"],
"message": "Domain AE exists",
"errors": [
Expand Down
Loading