Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions cdisc_rules_engine/check_operators/dataframe_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,7 @@ def is_complete_date(self, other_value):
def is_inconsistent_across_dataset(self, other_value):
target = other_value.get("target")
comparator = other_value.get("comparator")
regex = other_value.get("regex")
grouping_cols = []
if isinstance(comparator, str):
if comparator in self.value.columns:
Expand All @@ -1162,6 +1163,12 @@ def is_inconsistent_across_dataset(self, other_value):
grouping_cols.append(col)
df_check = self.value[grouping_cols + [target]].copy()
df_check = df_check.fillna("_NaN_")
if regex:
pattern = re.compile(regex)
if pattern.groups == 0:
regex = f"({regex})"
extracted = df_check[target].astype(str).str.extract(regex, expand=False)
df_check[target] = extracted.fillna(df_check[target])
results = pd.Series(False, index=df_check.index)
for name, group in df_check.groupby(grouping_cols, dropna=False):
if group[target].nunique() > 1:
Expand Down
8 changes: 8 additions & 0 deletions resources/schema/rule-merged/MetaVariables.json
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,14 @@
"const": "library_variable_core",
"markdownDescription": "\ncore attribute of a variable from the CDISC Library\n"
},
{
"const": "library_variable_has_codelist",
"markdownDescription": "\nIndicates whether a variable has an associated codelist in the CDISC Library\n"
},
{
"const": "library_variable_ccode",
"markdownDescription": "\nccode attribute of a variable from the CDISC Library\n"
},
{
"const": "library_variable_data_type",
"markdownDescription": "\nsimpleDatatype attribute of a variable from the CDISC Library\n"
Expand Down
2 changes: 1 addition & 1 deletion resources/schema/rule-merged/Operator.json
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@
"properties": {
"operator": {
"const": "is_inconsistent_across_dataset",
"markdownDescription": "\nChecks if a variable maintains consistent values within groups defined by one or more grouping variables. Groups records by specified value(s) and validates that the target variable maintains the same value within each unique combination of grouping variables. When inconsistency is detected within a group, the operator attempts to identify a majority value. If one value appears more frequently than all others, only the minority records (those not matching the majority value) are flagged. If no single majority exists \u2014 i.e., two or more values are tied for the highest frequency \u2014 all records in that group are flagged.\n\nSingle grouping variable - true if the values of BGSTRESU differ within USUBJID:\n\n```yaml\n- name: \"BGSTRESU\"\n operator: is_inconsistent_across_dataset\n value: \"USUBJID\"\n```\n\nMultiple grouping variables - true if the values of --STRESU differ within each combination of --TESTCD, --CAT, --SCAT, --SPEC, and --METHOD:\n\n```yaml\n- name: \"--STRESU\"\n operator: is_inconsistent_across_dataset\n value:\n - \"--TESTCD\"\n - \"--CAT\"\n - \"--SCAT\"\n - \"--SPEC\"\n - \"--METHOD\"\n```\n"
"markdownDescription": "\nChecks if a variable maintains consistent values within groups defined by one or more grouping variables. Groups records by specified value(s) and validates that the target variable maintains the same value within each unique combination of grouping variables. When inconsistency is detected within a group, the operator attempts to identify a majority value. If one value appears more frequently than all others, only the minority records (those not matching the majority value) are flagged. If no single majority exists \u2014 i.e., two or more values are tied for the highest frequency \u2014 all records in that group are flagged.\n\nSingle grouping variable - true if the values of BGSTRESU differ within USUBJID:\n\nIf a regex parameter is provided, it is applied to the values of the target variable before the consistency check. The first capture group of the regex is used as the normalized value for comparison. This can be useful when only part of the value should be considered during comparison (for example, comparing only the date portion of a datetime value).\n\n- regex is optional.\n- The pattern must include at least one capture group(or whole regex will be wrapped to capture group).\n- Only the first capture group is used for comparison.\n- If the pattern does not match a value, the original value is used.\n\n```yaml\n- name: \"BGSTRESU\"\n operator: is_inconsistent_across_dataset\n value: \"USUBJID\"\n```\n\nMultiple grouping variables - true if the values of --STRESU differ within each combination of --TESTCD, --CAT, --SCAT, --SPEC, and --METHOD:\n\n```yaml\n- name: \"--STRESU\"\n operator: is_inconsistent_across_dataset\n value:\n - \"--TESTCD\"\n - \"--CAT\"\n - \"--SCAT\"\n - \"--SPEC\"\n - \"--METHOD\"\n```\n"
}
},
"required": ["operator", "value"],
Expand Down
2 changes: 1 addition & 1 deletion resources/schema/rule-merged/Rule_Type.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
{
"const": "Define Item Metadata Check against Library Metadata",
"title": "Define xml metadata at variable level and corresponding library variable metadata",
"markdownDescription": "\n#### Columns\n\n- `define_variable_name`\n- `define_variable_label`\n- `define_variable_data_type`\n- `define_variable_role`\n- `define_variable_size`\n- `define_variable_ccode`\n- `define_variable_format`\n- `define_variable_allowed_terms`\n- `define_variable_origin_type`\n- `define_variable_is_collected`\n- `define_variable_has_no_data`\n- `define_variable_order_number`\n- `define_variable_has_codelist`\n- `define_variable_codelist_coded_values`\n- `define_variable_codelist_coded_codes`\n- `define_variable_mandatory`\n- `define_variable_has_comment`\n- `library_variable_name`\n- `library_variable_order_number`\n- `library_variable_label`\n- `library_variable_data_type`\n- `library_variable_role`\n- `library_variable_core`\n- `library_variable_ccode`\n\n#### Rule Macro\n\nChecks variable-level metadata, codelists, and codelist terms from Define-XML against the corresponding standard variable definitions from the CDISC Library.\n"
"markdownDescription": "\n#### Columns\n\n- `define_variable_name`\n- `define_variable_label`\n- `define_variable_data_type`\n- `define_variable_role`\n- `define_variable_size`\n- `define_variable_ccode`\n- `define_variable_format`\n- `define_variable_allowed_terms`\n- `define_variable_origin_type`\n- `define_variable_is_collected`\n- `define_variable_has_no_data`\n- `define_variable_order_number`\n- `define_variable_has_codelist`\n- `define_variable_codelist_coded_values`\n- `define_variable_codelist_coded_codes`\n- `define_variable_mandatory`\n- `define_variable_has_comment`\n- `library_variable_name`\n- `library_variable_order_number`\n- `library_variable_label`\n- `library_variable_data_type`\n- `library_variable_role`\n- `library_variable_core`\n- `library_variable_has_codelist`\n- `library_variable_ccode`\n\n#### Rule Macro\n\nChecks variable-level metadata, codelists, and codelist terms from Define-XML against the corresponding standard variable definitions from the CDISC Library.\n"
},
{
"const": "Domain Presence Check",
Expand Down
7 changes: 7 additions & 0 deletions resources/schema/rule/Operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,13 @@ Checks if a variable maintains consistent values within groups defined by one or

Single grouping variable - true if the values of BGSTRESU differ within USUBJID:

If a regex parameter is provided, it is applied to the values of the target variable before the consistency check. The first capture group of the regex is used as the normalized value for comparison. This can be useful when only part of the value should be considered during comparison (for example, comparing only the date portion of a datetime value).

- regex is optional.
- The pattern must include at least one capture group(or whole regex will be wrapped to capture group).
- Only the first capture group is used for comparison.
- If the pattern does not match a value, the original value is used.

```yaml
- name: "BGSTRESU"
operator: is_inconsistent_across_dataset
Expand Down
57 changes: 57 additions & 0 deletions tests/unit/test_check_operators/test_value_set_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,63 @@ def test_is_inconsistent_across_dataset(
assert result.equals(df.convert_to_series(expected_result))


@pytest.mark.parametrize(
"values,regex,expected",
[
# regex disabled
(["A", "B"], None, [True, True]),
(["A", "B"], "", [True, True]),
# regex collapsing values
(["TEST_v1", "TEST_v2"], r"^(TEST)", [False, False]),
(["ABC123", "XYZ123"], r"(\d+)", [False, False]),
(["HEIGHT_cm", "HEIGHT_mm"], r"^(HEIGHT)", [False, False]),
# datetime normalization
(
["2014-09-30T11:09", "2014-09-30T11:07"],
r"^(\d{4}-\d{2}-\d{2})",
[False, False],
),
(["TEST_A", "TEST_B"], r"^(TEST_[A-Z])", [True, True]),
(["SUBJ-001", "SUBJ-002"], r"SUBJ-(\d+)", [True, True]),
(
["2014-09-30T11:09", "2014-09-29T11:07"],
r"^(\d{4}-\d{2}-\d{2})",
[True, True],
),
# regex no capture group
(["ABC", "DEF"], r"^XYZ", [True, True]),
(["TEST_v1", "CONTROL"], r"^(TEST)", [True, True]),
(["A", "B"], r"(.*)", [True, True]),
(["A", None], r"(A)", [True, True]),
([None, None], r"(.*)", [False, False]),
([1, 1], r"(\d+)", [False, False]),
],
)
def test_is_inconsistent_across_dataset_regex(values, regex, expected):
df = pd.DataFrame(
{
"VISIT": ["WEEK1"] * len(values),
"EPOCH": ["TREATMENT"] * len(values),
"VALUE": values,
}
)

other_value = {
"target": "VALUE",
"comparator": ["VISIT", "EPOCH"],
"regex": regex,
}

obj = DataframeType(
{
"value": df,
}
)
result = obj.is_inconsistent_across_dataset(other_value)

assert result.tolist() == expected


@pytest.mark.parametrize(
"target, comparator, dataset_type, expected_result",
[
Expand Down
Loading