diff --git a/cdisc_rules_engine/models/actions.py b/cdisc_rules_engine/models/actions.py index 8eccf107f..91367424d 100644 --- a/cdisc_rules_engine/models/actions.py +++ b/cdisc_rules_engine/models/actions.py @@ -10,6 +10,7 @@ SOURCE_ROW_NUMBER, ) from cdisc_rules_engine.enums.sensitivity import Sensitivity +from cdisc_rules_engine.enums.rule_types import RuleTypes from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata from cdisc_rules_engine.models.dataset_variable import DatasetVariable from cdisc_rules_engine.models.validation_error_container import ( @@ -89,6 +90,21 @@ def _get_target_names_from_list_values( existing.add(value) return expanded + def _get_missing_variable_message(self) -> str: + """Get appropriate message for missing variables based on rule type.""" + rule_type = self.rule.get("rule_type", "") + metadata_check_types = [ + RuleTypes.VARIABLE_METADATA_CHECK.value, + RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE.value, + RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_DEFINE_XML_AND_LIBRARY.value, + RuleTypes.VARIABLE_METADATA_CHECK_AGAINST_LIBRARY.value, + RuleTypes.DATASET_METADATA_CHECK.value, + RuleTypes.DATASET_METADATA_CHECK_AGAINST_DEFINE.value, + ] + if rule_type in metadata_check_types: + return "not available in metadata context" + return "Not in dataset" + def generate_targeted_error_object( # noqa: C901 self, targets: Iterable[str], data: pd.DataFrame, message: str ) -> ValidationErrorContainer: @@ -143,8 +159,9 @@ def generate_targeted_error_object( # noqa: C901 if self.rule.get("sensitivity") == Sensitivity.DATASET.value: # Only generate one error for rules with dataset sensitivity + missing_var_msg = self._get_missing_variable_message() missing_vars = { - target: "Not in dataset" for target in targets_not_in_dataset + target: missing_var_msg for target in targets_not_in_dataset } # Create the initial error @@ -220,6 +237,8 @@ def generate_targeted_error_object( # noqa: C901 errors_list = self._generate_errors_by_target_presence( data, targets_not_in_dataset, all_targets_missing, errors_df ) + + compare_groups = self._extract_comparison_metadata(self.rule) return ValidationErrorContainer( domain=( f"SUPP{self.dataset_metadata.rdomain}" @@ -232,6 +251,7 @@ def generate_targeted_error_object( # noqa: C901 targets=targets_list, errors=errors_list, message=message.replace("--", self.dataset_metadata.domain_cleaned or ""), + compare_groups=compare_groups, ) def _generate_errors_by_target_presence( @@ -254,14 +274,15 @@ def _generate_errors_by_target_presence( Returns: List of ValidationErrorEntity objects """ - missing_vars = {target: "Not in dataset" for target in targets_not_in_dataset} + missing_var_msg = self._get_missing_variable_message() + missing_vars = {target: missing_var_msg for target in targets_not_in_dataset} if all_targets_missing: errors_list = [] for idx, row in data.iterrows(): error = ValidationErrorEntity( value={ - target: "Not in dataset" for target in targets_not_in_dataset + target: missing_var_msg for target in targets_not_in_dataset }, dataset=self._get_dataset_name(pd.DataFrame([row])), row=int(row.get(SOURCE_ROW_NUMBER, idx + 1)), @@ -383,17 +404,16 @@ def _build_complete_error_value( errors_df, ): """Build complete error value with all components.""" + missing_var_msg = self._get_missing_variable_message() if all_targets_missing: - error_value = { - target: "Not in dataset" for target in targets_not_in_dataset - } + error_value = {target: missing_var_msg for target in targets_not_in_dataset} else: error_value = self._build_error_value_from_row(first_row_idx, errors_df) error_value = self._add_group_keys_to_error_value( error_value, group_keys, grouping_variables ) - missing_vars = {target: "Not in dataset" for target in targets_not_in_dataset} + missing_vars = {target: missing_var_msg for target in targets_not_in_dataset} if missing_vars: error_value = {**error_value, **missing_vars} @@ -509,6 +529,51 @@ def extract_target_names_from_value_level_metadata(self) -> List[str]: ordered.append(name) return ordered + def _extract_comparison_metadata(self, rule: dict) -> Optional[List[List[str]]]: + """ + Extract comparison metadata from rule's output_variables. + + Supports mixed lists with inline `compared` blocks, e.g.: + Output Variables: + - $sibling_1 + - compared: + - $child_A + - $child_B + - $child_C + + Returns: + List of comparison groups (each group is a list of variable names), + or None if no comparison groups are defined. + """ + if "_cached_compare_groups" in rule: + return rule["_cached_compare_groups"] + + output_variables = rule.get("output_variables", []) or [] + + flattened: List[str] = [] + comparison_groups: List[List[str]] = [] + + for item in output_variables: + if isinstance(item, dict) and "compared" in item: + children = item.get("compared", []) + if isinstance(children, list): + valid_children = [c for c in children if isinstance(c, str)] + flattened.extend(valid_children) + if len(valid_children) >= 2: + comparison_groups.append(valid_children) + elif isinstance(item, str): + flattened.append(item) + + result = comparison_groups if comparison_groups else None + rule["_cached_compare_groups"] = result + + if flattened: + current_vars = rule.get("output_variables", []) + if any(isinstance(item, dict) for item in current_vars): + rule["output_variables"] = flattened + + return result + @staticmethod def _sequence_exists(sequence: pd.Series, row_name: Hashable) -> bool: return ( diff --git a/cdisc_rules_engine/models/validation_error_container.py b/cdisc_rules_engine/models/validation_error_container.py index 7f0c77f9f..1988bad3c 100644 --- a/cdisc_rules_engine/models/validation_error_container.py +++ b/cdisc_rules_engine/models/validation_error_container.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from dataclasses import dataclass, field from cdisc_rules_engine.utilities.utils import get_execution_status @@ -18,13 +18,14 @@ class ValidationErrorContainer(BaseValidationEntity): message: str | None = None status: str | None = None entity: str | None = None + compare_groups: Optional[List[List[str]]] = None @property def executionStatus(self): return self.status or get_execution_status(self.errors) def to_representation(self) -> dict: - return { + result = { "executionStatus": self.executionStatus, "dataset": self.dataset, "domain": self.domain, @@ -33,3 +34,6 @@ def to_representation(self) -> dict: "errors": [error.to_representation() for error in self.errors], **({"entity": self.entity} if self.entity else {}), } + if self.compare_groups: + result["compare_groups"] = self.compare_groups + return result diff --git a/cdisc_rules_engine/services/reporting/base_report_data.py b/cdisc_rules_engine/services/reporting/base_report_data.py index 1cbaa3746..a62f1861f 100644 --- a/cdisc_rules_engine/services/reporting/base_report_data.py +++ b/cdisc_rules_engine/services/reporting/base_report_data.py @@ -47,7 +47,12 @@ def process_values(values: list[str]) -> list[str]: if value is None: processed_values.append("null") continue - value = value.strip() + if isinstance(value, str) and "\n" in value: + lines = value.split("\n") + stripped_lines = [line.rstrip() for line in lines] + value = "\n".join(stripped_lines).strip() + elif isinstance(value, str): + value = value.strip() if value == "" or value.lower() == "nan": processed_values.append("null") else: diff --git a/cdisc_rules_engine/services/reporting/excel_writer.py b/cdisc_rules_engine/services/reporting/excel_writer.py index 7979aa0b7..ec2762a36 100644 --- a/cdisc_rules_engine/services/reporting/excel_writer.py +++ b/cdisc_rules_engine/services/reporting/excel_writer.py @@ -35,9 +35,17 @@ def excel_update_worksheet(ws, rows, align_params=None, fill_empty_rows=False): ws.cell(row=row_data.row, column=2).value = row_data.value else: for col_num, col_data in enumerate(row_data.values(), 1): - ws.cell(row=row_num, column=col_num).value = stringify_list(col_data) + cell_value = stringify_list(col_data) + ws.cell(row=row_num, column=col_num).value = cell_value + if align_params: + alignment_params = align_params.copy() + else: + alignment_params = {} + if isinstance(cell_value, str) and "\n" in cell_value: + alignment_params["wrap_text"] = True + alignment_params["vertical"] = "top" ws.cell(row=row_num, column=col_num).alignment = Alignment( - **align_params + **alignment_params ) if fill_empty_rows and (row_data[1] == "" or row_data[1] is None): # Codelist is empty for Code Rows. Change background color diff --git a/cdisc_rules_engine/services/reporting/sdtm_report_data.py b/cdisc_rules_engine/services/reporting/sdtm_report_data.py index a6af212f8..bb2c22dc3 100644 --- a/cdisc_rules_engine/services/reporting/sdtm_report_data.py +++ b/cdisc_rules_engine/services/reporting/sdtm_report_data.py @@ -22,6 +22,67 @@ from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata +def _normalize_to_list(value): + """Normalize various data structures to lists.""" + if isinstance(value, (list, tuple, set)): + return list(value) + if isinstance(value, dict): + return list(value.keys()) + return [value] if value is not None else [] + + +def _flatten_to_comparable(value): + """Flatten nested structures to comparable primitives.""" + result = [] + for item in value: + if isinstance(item, (list, tuple, set)): + result.extend(_flatten_to_comparable(list(item))) + elif isinstance(item, dict): + result.extend(_flatten_to_comparable(list(item.keys()))) + else: + result.append(item) + return result + + +def _compare_data_structures(value1, value2): + """Compare two data structures and return differences (set-based).""" + if value1 is None and value2 is None: + return {"missing_in_value2": [], "extra_in_value2": [], "common": []} + if value1 is None or value2 is None: + return { + "missing_in_value2": ( + _normalize_to_list(value1) if value1 is not None else [] + ), + "extra_in_value2": _normalize_to_list(value2) if value2 is not None else [], + "common": [], + } + + set1 = set(_flatten_to_comparable(_normalize_to_list(value1))) + set2 = set(_flatten_to_comparable(_normalize_to_list(value2))) + + return { + "missing_in_value2": sorted(set1 - set2), + "extra_in_value2": sorted(set2 - set1), + "common": sorted(set1 & set2), + } + + +def _format_comparison_result(comparison, var1_name, var2_name): + """Format comparison results as human-readable string.""" + missing = comparison.get("missing_in_value2", []) + extra = comparison.get("extra_in_value2", []) + + parts = [] + if missing: + parts.append(f"Missing in {var2_name}: {', '.join(map(str, missing))}") + if extra: + parts.append(f"Extra in {var2_name}: {', '.join(map(str, extra))}") + if not parts: + parts.append("No differences found") + + return "\n".join(parts) + + class SDTMReportData(BaseReportData): """ Report details specific to SDTM @@ -29,6 +90,16 @@ class SDTMReportData(BaseReportData): TEMPLATE_FILE_PATH = DefaultFilePaths.EXCEL_TEMPLATE_FILE.value + _SPECIAL_ERROR_FIELDS = { + "USUBJID", + "dataset", + "row", + "SEQ", + "entity", + "instance_id", + "path", + } + def __init__( self, datasets: Iterable[SDTMDatasetMetadata], @@ -248,6 +319,100 @@ def get_detailed_data(self, excel=False) -> list[dict]: key=lambda x: (x["core_id"], x["dataset"]), ) + def _process_comparison_group(self, group: list, error_value: dict) -> str: + """Process a single comparison group and return formatted comparison string.""" + if len(group) < 2: + return "" + + baseline_name, baseline_value = group[0], error_value.get(group[0]) + + summary_lines = [ + ( + _format_comparison_result( + _compare_data_structures(baseline_value, other_value), + baseline_name, + other_name, + ) + if baseline_value is not None and other_value is not None + else f"{other_name}: null vs {baseline_name}: null" + ) + for other_name in group[1:] + for other_value in [error_value.get(other_name)] + ] + + raw_value_lines = [ + ( + f"{name}: {val}" + if (val := error_value.get(name)) is not None + else f"{name}: null" + ) + for name in group + ] + + return "\n".join(summary_lines + raw_value_lines) + + def _extract_value_for_variable( + self, variable: str, error: dict, error_value: dict + ) -> str | None: + """Extract value for a variable, checking special fields first.""" + if variable in self._SPECIAL_ERROR_FIELDS: + val = error.get(variable) + if val is None: + val = error_value.get(variable) + else: + val = error_value.get(variable) + return None if val is None else str(val) + + def _extract_values_from_error( + self, error: dict, error_value: dict, compare_groups: list, variables: list + ) -> list: + """Extract values from error, handling comparison groups or standard variables.""" + if not compare_groups: + return [ + self._extract_value_for_variable(v, error, error_value) + for v in variables + ] + + compare_group_vars = {var for group in compare_groups for var in group} + var_to_group = {var: group for group in compare_groups for var in group} + processed_groups = set() + + result = [] + for variable in variables: + if variable in compare_group_vars: + group = var_to_group[variable] + group_key = tuple(sorted(group)) + if group_key not in processed_groups: + result.append(self._process_comparison_group(group, error_value)) + processed_groups.add(group_key) + else: + result.append( + self._extract_value_for_variable(variable, error, error_value) + ) + + return result + + def _create_error_item( + self, + validation_result: RuleValidationResult, + result: dict, + error: dict, + variables: list, + values: list, + ) -> dict: + """Create a single error item dictionary.""" + return { + "core_id": validation_result.id, + "message": result.get("message"), + "executability": validation_result.executability, + "dataset": error.get("dataset"), + "USUBJID": error.get("USUBJID", ""), + "row": error.get("row", ""), + "SEQ": error.get("SEQ", ""), + "variables": variables, + "values": self.process_values(values), + } + def _generate_error_details( self, validation_result: RuleValidationResult, excel ) -> list[dict]: @@ -271,25 +436,36 @@ def _generate_error_details( for result in validation_result.results or []: if result.get("errors", []) and result.get("executionStatus") == "success": variables = result.get("variables", []) + compare_groups = result.get("compare_groups") or [] + for error in result.get("errors"): - values = [] - for variable in variables: - raw_value = error.get("value", {}).get(variable) - if raw_value is None: - values.append(None) - else: - values.append(str(raw_value)) - error_item = { - "core_id": validation_result.id, - "message": result.get("message"), - "executability": validation_result.executability, - "dataset": error.get("dataset"), - "USUBJID": error.get("USUBJID", ""), - "row": error.get("row", ""), - "SEQ": error.get("SEQ", ""), - "variables": variables, - "values": self.process_values(values), - } + error_value = error.get("value", {}) + values = self._extract_values_from_error( + error, error_value, compare_groups, variables + ) + if compare_groups: + compare_group_vars = { + var for group in compare_groups for var in group + } + var_to_group = { + var: group for group in compare_groups for var in group + } + processed_groups = set() + aligned_variables = [] + for variable in variables: + if variable in compare_group_vars: + group = var_to_group[variable] + group_key = tuple(sorted(group)) + if group_key not in processed_groups: + aligned_variables.append(", ".join(group)) + processed_groups.add(group_key) + else: + aligned_variables.append(variable) + else: + aligned_variables = variables + error_item = self._create_error_item( + validation_result, result, error, aligned_variables, values + ) errors.append(error_item) return errors diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py index d7c834321..ccae357a2 100644 --- a/cdisc_rules_engine/utilities/rule_processor.py +++ b/cdisc_rules_engine/utilities/rule_processor.py @@ -781,11 +781,46 @@ def extract_target_names_from_rule( pattern: ^TSVAL\d+$ (starts with TSVAL and ends with number) additional columns: TSVAL1, TSVAL2, TSVAL3 etc. """ - if rule.get("output_variables"): - return RuleProcessor._extract_targets_from_output_variables(rule, domain) - return RuleProcessor._extract_targets_from_conditions( - rule, domain, column_names - ) + output_variables = rule.get("output_variables", []) + if output_variables: + flattened_vars: List[str] = [] + for item in output_variables: + if isinstance(item, dict) and "compared" in item: + children = item.get("compared", []) + if isinstance(children, list): + flattened_vars.extend( + [c for c in children if isinstance(c, str)] + ) + elif isinstance(item, str): + flattened_vars.append(item) + + target_names: List[str] = [ + var.replace("--", domain or "", 1) for var in flattened_vars + ] + else: + target_names: List[str] = [] + conditions: ConditionInterface = rule["conditions"] + for condition in conditions.values(): + if condition.get("operator") == "not_exists": + continue + target: str = condition["value"].get("target") + if target is None: + continue + target = target.replace("--", domain or "") + op_related_pattern: str = RuleProcessor.get_operator_related_pattern( + condition.get("operator"), target + ) + if op_related_pattern is not None: + # if pattern exists -> return only matching column names + target_names.extend( + filter( + lambda name: re.match(op_related_pattern, name), + column_names, + ) + ) + else: + target_names.append(target) + return list(dict.fromkeys(target_names)) @staticmethod def extract_referenced_variables_from_rule(rule: dict): diff --git a/resources/schema/CORE-base.json b/resources/schema/CORE-base.json index 6fccb4afd..769a6b569 100644 --- a/resources/schema/CORE-base.json +++ b/resources/schema/CORE-base.json @@ -460,7 +460,25 @@ }, "Output Variables": { "items": { - "$ref": "Operator.json#/properties/name" + "oneOf": [ + { + "$ref": "Operator.json#/properties/name" + }, + { + "type": "object", + "properties": { + "compared": { + "type": "array", + "items": { + "$ref": "Operator.json#/properties/name" + }, + "minItems": 2 + } + }, + "required": ["compared"], + "additionalProperties": false + } + ] }, "type": "array" } diff --git a/resources/schema/Rule_Type.md b/resources/schema/Rule_Type.md index a3ec83245..5307c4d46 100644 --- a/resources/schema/Rule_Type.md +++ b/resources/schema/Rule_Type.md @@ -282,6 +282,47 @@ Mapping of Result property names to Report Issue Details Column Names: | instance_id | instance_id | Instance ID | | path | path | Path | +**Note:** Output variables must match their respective rule types. If an output variable is not available for a specific rule type, the report will display an appropriate message. For Variable Metadata Check rules (and their variants) and Dataset Metadata Check rules (and their variants), if an output variable is not available in the metadata context, the report will display "not available in metadata context" instead of the variable value. + +#### Output Variable Comparison + +You can use the `compared` syntax to compare multiple output variables. When using `compared`, the reporting engine will perform a set-based (order-independent) comparison between the variables and display formatted comparison results showing missing and extra items. + +**Syntax:** + +```yaml +Outcome: + Message: "Comparison error message" + Output Variables: + - $variable1 + - compared: + - $baseline_variable + - $comparison_variable1 + - $comparison_variable2 +``` + +**Behavior:** + +- The first variable in a `compared` block serves as the baseline for comparison +- Each subsequent variable is compared against the baseline using set difference logic +- Comparison results show: + - Items missing in the comparison variable (present in baseline but not in comparison) + - Items extra in the comparison variable (present in comparison but not in baseline) + - Raw values for all variables in the group +- You can have multiple `compared` blocks in a single `Output Variables` list +- Each `compared` block must contain at least 2 variables + +**Example:** + +```yaml +Outcome: + Message: "Expected variables missing from dataset" + Output Variables: + - compared: + - $dataset_variables + - $expected_variables +``` + ### Scope A JSONata rule will always run once for the entire JSON file, regardless of the Scope. The `Entity` determination must come from the rule's JSONata result property. diff --git a/tests/unit/test_actions.py b/tests/unit/test_actions.py index 0dbda6d6b..3eeaaae2f 100644 --- a/tests/unit/test_actions.py +++ b/tests/unit/test_actions.py @@ -335,6 +335,130 @@ def test_json_serializable_value(data): json.dumps(result.to_representation()) +def test_extract_comparison_metadata(): + """Test extraction of comparison metadata from output_variables.""" + dummy_rule = { + "core_id": "ComparisonTest", + "output_variables": [ + "$VAR1", + {"compared": ["$VAR2", "$VAR3", "$VAR4"]}, + "$VAR5", + ], + } + df = pd.DataFrame({"VAR1": [1], "VAR2": [2], "VAR3": [3], "VAR4": [4], "VAR5": [5]}) + variable = DatasetVariable(df) + dataset_metadata = SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, filename="ae.xpt" + ) + action = COREActions([], variable, dataset_metadata, dummy_rule) + + compare_groups = action._extract_comparison_metadata(dummy_rule) + assert compare_groups == [["$VAR2", "$VAR3", "$VAR4"]] + assert dummy_rule["output_variables"] == [ + "$VAR1", + "$VAR2", + "$VAR3", + "$VAR4", + "$VAR5", + ] + + +def test_extract_comparison_metadata_multiple_groups(): + """Test extraction with multiple compared blocks.""" + dummy_rule = { + "core_id": "ComparisonTest", + "output_variables": [ + "$VAR1", + {"compared": ["$VAR2", "$VAR3"]}, + {"compared": ["$VAR4", "$VAR5", "$VAR6"]}, + ], + } + df = pd.DataFrame( + {"VAR1": [1], "VAR2": [2], "VAR3": [3], "VAR4": [4], "VAR5": [5], "VAR6": [6]} + ) + variable = DatasetVariable(df) + dataset_metadata = SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, filename="ae.xpt" + ) + action = COREActions([], variable, dataset_metadata, dummy_rule) + + compare_groups = action._extract_comparison_metadata(dummy_rule) + assert compare_groups == [["$VAR2", "$VAR3"], ["$VAR4", "$VAR5", "$VAR6"]] + + +def test_extract_comparison_metadata_caching(): + """Test that comparison metadata is cached in rule dict.""" + dummy_rule = { + "core_id": "ComparisonTest", + "output_variables": [{"compared": ["$VAR1", "$VAR2"]}], + } + df = pd.DataFrame({"VAR1": [1], "VAR2": [2]}) + variable = DatasetVariable(df) + dataset_metadata = SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, filename="ae.xpt" + ) + action = COREActions([], variable, dataset_metadata, dummy_rule) + + compare_groups1 = action._extract_comparison_metadata(dummy_rule) + compare_groups2 = action._extract_comparison_metadata(dummy_rule) + + assert compare_groups1 == compare_groups2 == [["$VAR1", "$VAR2"]] + assert "_cached_compare_groups" in dummy_rule + + +def test_extract_comparison_metadata_no_compared(): + """Test that None is returned when no compared blocks exist.""" + dummy_rule = { + "core_id": "NoComparisonTest", + "output_variables": ["$VAR1", "$VAR2"], + } + df = pd.DataFrame({"VAR1": [1], "VAR2": [2]}) + variable = DatasetVariable(df) + dataset_metadata = SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, filename="ae.xpt" + ) + action = COREActions([], variable, dataset_metadata, dummy_rule) + + compare_groups = action._extract_comparison_metadata(dummy_rule) + assert compare_groups is None + + +def test_generate_targeted_error_object_with_compare_groups(): + """Test that compare_groups are included in ValidationErrorContainer.""" + dummy_rule = { + "core_id": "ComparisonTest", + "actions": [ + { + "name": "generate_targeted_error_objects", + "params": {"message": "Comparison test"}, + } + ], + "output_variables": [{"compared": ["$VAR1", "$VAR2"]}], + } + df = pd.DataFrame( + { + "VAR1": [1, 2], + "VAR2": [2, 3], + "USUBJID": ["SUBJ-001", "SUBJ-002"], + } + ) + df[SOURCE_FILENAME] = "test.xpt" + df[SOURCE_ROW_NUMBER] = [1, 2] + variable = DatasetVariable(df) + dataset_metadata = SDTMDatasetMetadata( + first_record={"DOMAIN": "AE"}, filename="test.xpt" + ) + action = COREActions([], variable, dataset_metadata, dummy_rule) + + result = action.generate_targeted_error_object( + {"VAR1", "VAR2"}, df, "Comparison test" + ) + assert result.compare_groups == [["$VAR1", "$VAR2"]] + representation = result.to_representation() + assert "compare_groups" in representation + assert representation["compare_groups"] == [["$VAR1", "$VAR2"]] + + def test_nan_handling_in_error_object(): dummy_rule = { "core_id": "NaNTest", diff --git a/tests/unit/test_services/test_reporting/test_sdtm_report.py b/tests/unit/test_services/test_reporting/test_sdtm_report.py index 95a9beddb..3409f7993 100644 --- a/tests/unit/test_services/test_reporting/test_sdtm_report.py +++ b/tests/unit/test_services/test_reporting/test_sdtm_report.py @@ -83,6 +83,120 @@ def test_get_detailed_data(mock_validation_results): assert error == detailed_data[i] +def test_get_detailed_data_with_compare_groups(): + """Test detailed data generation with comparison groups.""" + from cdisc_rules_engine.models.rule_validation_result import RuleValidationResult + + rule = { + "core_id": "CORE-000334", + "actions": [ + { + "name": "generate_targeted_error_objects", + "params": {"message": "Test comparison"}, + } + ], + "executability": "Fully Executable", + "authorities": [], + } + validation_result = RuleValidationResult( + rule, + [ + { + "executionStatus": "success", + "variables": ["$dataset_variables", "$expected_variables"], + "compare_groups": [["$dataset_variables", "$expected_variables"]], + "errors": [ + { + "dataset": "AE", + "USUBJID": "001", + "row": 1, + "SEQ": 1, + "value": { + "$dataset_variables": ["VAR1", "VAR2"], + "$expected_variables": ["VAR1", "VAR2", "VAR3"], + }, + } + ], + "message": "Test comparison", + } + ], + ) + + report = SDTMReportData( + [], + ["test"], + [validation_result], + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + detailed_data = report.get_detailed_data() + + assert len(detailed_data) == 1 + error = detailed_data[0] + assert error["core_id"] == "CORE-000334" + assert error["message"] == "Test comparison" + # Variables should be aligned (comma-separated list of comparison group vars) + assert error["variables"] == ["$dataset_variables, $expected_variables"] + # Values should contain comparison result string + assert len(error["values"]) == 1 + assert "Missing in" in error["values"][0] or "Extra in" in error["values"][0] + + +def test_get_detailed_data_with_multiple_compare_groups(): + """Test detailed data with multiple comparison groups.""" + from cdisc_rules_engine.models.rule_validation_result import RuleValidationResult + + rule = { + "core_id": "CORE-000335", + "actions": [ + {"name": "generate_targeted_error_objects", "params": {"message": "Test"}} + ], + "executability": "Fully Executable", + "authorities": [], + } + validation_result = RuleValidationResult( + rule, + [ + { + "executionStatus": "success", + "variables": ["$VAR1", "$VAR2", "$VAR3", "$VAR4"], + "compare_groups": [["$VAR1", "$VAR2"], ["$VAR3", "$VAR4"]], + "errors": [ + { + "dataset": "AE", + "USUBJID": "001", + "row": 1, + "SEQ": 1, + "value": { + "$VAR1": ["A", "B"], + "$VAR2": ["A"], + "$VAR3": [1, 2], + "$VAR4": [1, 2, 3], + }, + } + ], + "message": "Test", + } + ], + ) + + report = SDTMReportData( + [], + ["test"], + [validation_result], + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + detailed_data = report.get_detailed_data() + + assert len(detailed_data) == 1 + error = detailed_data[0] + # Should have 2 values (one per comparison group) + assert len(error["values"]) == 2 + # Variables should be aligned to match values count + assert len(error["variables"]) == 2 + + def test_get_summary_data(mock_validation_results): report = SDTMReportData( [], diff --git a/tests/unit/test_utilities/test_rule_processor.py b/tests/unit/test_utilities/test_rule_processor.py index 25b17a808..439cb66a4 100644 --- a/tests/unit/test_utilities/test_rule_processor.py +++ b/tests/unit/test_utilities/test_rule_processor.py @@ -1148,6 +1148,37 @@ def test_extract_target_names_from_rule_output_variables(): ] +def test_extract_target_names_from_rule_output_variables_with_compared(): + """Test extraction of target names when output_variables contains compared blocks.""" + rule: dict = { + "output_variables": [ + "$VAR1", + {"compared": ["$VAR2", "$VAR3", "$VAR4"]}, + "$VAR5", + ], + } + target_names: List[str] = RuleProcessor.extract_target_names_from_rule( + rule, "AE", ["VAR1", "VAR2", "VAR3", "VAR4", "VAR5"] + ) + assert set(target_names) == {"$VAR1", "$VAR2", "$VAR3", "$VAR4", "$VAR5"} + + +def test_extract_target_names_from_rule_output_variables_mixed_compared(): + """Test extraction with multiple compared blocks and mixed strings.""" + rule: dict = { + "output_variables": [ + "$VAR1", + {"compared": ["$VAR2", "$VAR3"]}, + "$VAR4", + {"compared": ["$VAR5", "$VAR6"]}, + ], + } + target_names: List[str] = RuleProcessor.extract_target_names_from_rule( + rule, "AE", ["VAR1", "VAR2", "VAR3", "VAR4", "VAR5", "VAR6"] + ) + assert set(target_names) == {"$VAR1", "$VAR2", "$VAR3", "$VAR4", "$VAR5", "$VAR6"} + + @pytest.mark.parametrize( "conditions", [