From 08d141ed4300408e88ada912b0aef71ca731f632 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 24 Feb 2026 18:12:32 -0500 Subject: [PATCH 1/6] operator rework --- .../check_operators/dataframe_operators.py | 180 ++++++++++++------ resources/schema/rule/Operator.json | 4 + resources/schema/rule/Operator.md | 3 +- .../test_relationship_integrity_checks.py | 59 ++++-- 4 files changed, 178 insertions(+), 68 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index a0a9dd18d..3282f78b9 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1640,51 +1640,132 @@ def value_has_multiple_references(self, other_value: dict): def value_does_not_have_multiple_references(self, other_value: dict): return ~self.value_has_multiple_references(other_value) - def check_target_ascending_in_sorted_group(self, group, target, comparator): + def _mark_invalid_null_positions(self, is_valid, group, null_mask, na_pos): + null_indices = group[null_mask].index.tolist() + non_null_indices = group[~null_mask].index.tolist() + index_order = group.index.tolist() + + if not null_indices or not non_null_indices: + return is_valid + + if na_pos == "last": + last_non_null = max(index_order.index(i) for i in non_null_indices) + first_null = min(index_order.index(i) for i in null_indices) + if first_null < last_non_null: + is_valid[null_mask] = False + else: # first + last_null = max(index_order.index(i) for i in null_indices) + first_non_null = min(index_order.index(i) for i in non_null_indices) + if last_null > first_non_null: + is_valid[null_mask] = False + + return is_valid + + def _verify_neighbor_consistency( + self, + is_valid, + non_null_rows, + target, + comparator, + ascending, + is_numeric_comparator, + ): + indices = non_null_rows.index.tolist() + comparator_vals = non_null_rows[comparator].tolist() + + for i, idx in enumerate(indices): + if not is_valid.loc[idx]: + continue + + curr = comparator_vals[i] + if self._is_null_or_empty(curr): + continue + + prev = next( + ( + comparator_vals[j] + for j in range(i - 1, -1, -1) + if not self._is_null_or_empty(comparator_vals[j]) + ), + None, + ) + nxt = next( + ( + comparator_vals[j] + for j in range(i + 1, len(comparator_vals)) + if not self._is_null_or_empty(comparator_vals[j]) + ), + None, + ) + + if not is_numeric_comparator and not is_valid_date(str(curr)): + continue + + if ascending: + if prev is not None and curr < prev: + is_valid.loc[idx] = False + elif nxt is not None and curr > nxt: + is_valid.loc[idx] = False + else: + if prev is not None and curr > prev: + is_valid.loc[idx] = False + elif nxt is not None and curr < nxt: + is_valid.loc[idx] = False + + return is_valid + + def check_target_ascending_in_sorted_group( + self, group, target, comparator, ascending, na_pos + ): """ Check if target values are in ascending order within a group already sorted by comparator. - - Null comparator or null target: mark that row as False - - Only check ascending order between rows where both are non-null """ is_valid = pd.Series(True, index=group.index) - target_values = group[target].tolist() - comparator_values = group[comparator].tolist() - is_numeric_target = pd.api.types.is_numeric_dtype(group[target]) + is_numeric_comparator = pd.api.types.is_numeric_dtype(group[comparator]) - # Mark any row with null comparator or null target as False - for i in range(len(target_values)): - if pd.isna(comparator_values[i]) or pd.isna(target_values[i]): - is_valid.iloc[i] = False + null_mask = group[comparator].isna() | ( + group[comparator].astype(str).str.strip() == "" + ) + non_null_rows = group[~null_mask] - # Only check ascending order on rows where both target and comparator are non-null - valid_positions = [ - i - for i in range(len(target_values)) - if not pd.isna(comparator_values[i]) and not pd.isna(target_values[i]) - ] + is_valid = self._mark_invalid_null_positions(is_valid, group, null_mask, na_pos) - for i in range(len(valid_positions) - 1): - curr_pos = valid_positions[i] - next_pos = valid_positions[i + 1] - current = target_values[curr_pos] - next_val = target_values[next_pos] + # Compare only non-null rows positionally + non_null_sorted = non_null_rows.sort_values(by=comparator, ascending=ascending) + + actual_target = non_null_rows[target].tolist() + expected_target = non_null_sorted[target].tolist() + non_null_indices = non_null_rows.index.tolist() + + for i in range(len(actual_target)): + actual = actual_target[i] + expected_val = expected_target[i] - if ( - not is_numeric_target - and is_valid_date(current) - and is_valid_date(next_val) + if pd.isna(actual) and pd.isna(expected_val): + continue + elif pd.isna(actual) or pd.isna(expected_val): + is_valid.loc[non_null_indices[i]] = False + elif ( + not is_numeric_comparator + and is_valid_date(actual) + and is_valid_date(expected_val) ): - date1, _ = parse_date(current) - date2, _ = parse_date(next_val) - if date1 > date2: - is_valid.iloc[curr_pos] = False - is_valid.iloc[next_pos] = False + date1, _ = parse_date(actual) + date2, _ = parse_date(expected_val) + if date1 != date2: + is_valid.loc[non_null_indices[i]] = False else: - if current > next_val: - is_valid.iloc[curr_pos] = False - is_valid.iloc[next_pos] = False - + if actual != expected_val: + is_valid.loc[non_null_indices[i]] = False + is_valid = self._verify_neighbor_consistency( + is_valid, + non_null_rows, + target, + comparator, + ascending, + is_numeric_comparator, + ) return is_valid def check_date_overlaps(self, group, target, comparator): @@ -1705,7 +1786,9 @@ def check_date_overlaps(self, group, target, comparator): valid_positions = [ i for i in range(len(comparator_values)) - if not pd.isna(comparator_values[i]) + if not ( + pd.isna(comparator_values[i]) or str(comparator_values[i]).strip() == "" + ) ] for i in range(len(valid_positions) - 1): @@ -1721,10 +1804,8 @@ def check_date_overlaps(self, group, target, comparator): if prec1 != prec2: overlaps, less_precise = dates_overlap(date1, prec1, date2, prec2) if overlaps: - if date1.startswith(less_precise): - is_valid.iloc[curr_pos] = False - elif date2.startswith(less_precise): - is_valid.iloc[next_pos] = False + is_valid.iloc[curr_pos] = False + is_valid.iloc[next_pos] = False return is_valid @@ -1768,16 +1849,6 @@ def target_is_sorted_by(self, other_value: dict): """ Check if target is in ascending order when rows are sorted by comparator. - Nulls in either target or comparator are marked False and excluded - from the ascending order check. - - Process: - 1. Sort data by within columns (always ASC) and comparator (ASC/DESC) - 2. Within each group: - - Mark null comparator or null target rows as False - - Check remaining rows: is target ascending? - - Check for date overlaps in comparator (if dates) - 3. Map results back to original row order """ target = other_value.get("target") within_columns = self._normalize_grouping_columns(other_value.get("within")) @@ -1788,6 +1859,7 @@ def target_is_sorted_by(self, other_value: dict): for col in columns: comparator: str = self.replace_prefix(col["name"]) ascending: bool = col["sort_order"].lower() != "desc" + na_pos: str = col.get("null_position", "last") selected_columns = list( dict.fromkeys([target, comparator, *within_columns]) @@ -1795,16 +1867,16 @@ def target_is_sorted_by(self, other_value: dict): # Sort by within columns (always ASC) and comparator in specified order sorted_df = self.value[selected_columns].sort_values( - by=[*within_columns, comparator], - ascending=[True] * len(within_columns) + [ascending], + by=[*within_columns, target], + ascending=[True] * (len(within_columns) + 1), ) grouped_df = sorted_df.groupby(within_columns, sort=False) - # Check 1: Target is ascending in sorted groups, nulls marked False + # Check 1: Target order matches expected comparator order target_check = grouped_df.apply( lambda x: self.check_target_ascending_in_sorted_group( - x, target, comparator + x, target, comparator, ascending, na_pos ) ) target_check = self._process_grouped_result( @@ -1813,7 +1885,7 @@ def target_is_sorted_by(self, other_value: dict): within_columns, sorted_df, lambda group: self.check_target_ascending_in_sorted_group( - group, target, comparator + group, target, comparator, ascending, na_pos ), ) diff --git a/resources/schema/rule/Operator.json b/resources/schema/rule/Operator.json index f18d9937f..4c6bb6c87 100644 --- a/resources/schema/rule/Operator.json +++ b/resources/schema/rule/Operator.json @@ -579,6 +579,10 @@ "items": { "properties": { "name": { "$ref": "Operator.json#/properties/name" }, + "null_position": { + "enum": ["first", "last"], + "type": "string" + }, "order": { "$ref": "Operator.json#/properties/order" } }, "type": "object" diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index a5ce269bf..c55f30576 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1120,7 +1120,7 @@ Complement of `is_ordered_by` ### target_is_sorted_by -True if the values in `name` are ordered according to the values specified by `value` in ascending/descending order, grouped by the values in `within`. Each `value` requires a variable `name` and an ordering of 'asc' or 'desc' specified by `order`. `within` accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 'YYYY-MM-DD' format +True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in wit hin.Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date values with different precisions that overlap (e.g. 2005-10 and 2005-10-08) are also flagged as not sorted as their order cannot be inferred. ```yaml Check: @@ -1133,6 +1133,7 @@ Check: value: - name: --STDTC sort_order: asc + null_position: last ``` ### target_is_not_sorted_by diff --git a/tests/unit/test_check_operators/test_relationship_integrity_checks.py b/tests/unit/test_check_operators/test_relationship_integrity_checks.py index 4b0da3398..73745882f 100644 --- a/tests/unit/test_check_operators/test_relationship_integrity_checks.py +++ b/tests/unit/test_check_operators/test_relationship_integrity_checks.py @@ -958,26 +958,59 @@ def test_target_is_sorted_by_multiple_within_numeric(dataset_class): @pytest.mark.parametrize("dataset_class", [PandasDataset, DaskDataset]) def test_target_is_sorted_by_with_nulls(dataset_class): - """Test target_is_sorted_by handles null values correctly. - Null in either target or comparator marks that row as False, - but does not affect the ordering check of surrounding non-null rows. - """ - df = dataset_class.from_dict( + df_null_last = dataset_class.from_dict( { - "USUBJID": [123, 456, 456, 123, 123], - "SESEQ": [1, 2, 1, None, None], - "SESTDTC": ["2006-06-02", None, "2006-06-01", None, "2006-06-03"], + "USUBJID": ["001", "001", "001", "001", "002", "002", "002"], + "SESEQ": [1, 2, 3, 4, 1, 2, 3], + "SESTDTC": [ + "2006-01-01", + "2006-01-02", + "", + "2006-01-04", + "2006-01-01", + "", + "2006-01-03", + ], } ) - other_value = { + other_value_last = { "target": "--SEQ", "within": "USUBJID", - "comparator": [{"name": "--STDTC", "sort_order": "ASC"}], + "comparator": [ + {"name": "--STDTC", "sort_order": "ASC", "null_position": "last"} + ], } result = DataframeType( - {"value": df, "column_prefix_map": {"--": "SE"}} - ).target_is_sorted_by(other_value) - assert result.equals(pd.Series([True, False, True, False, False])) + {"value": df_null_last, "column_prefix_map": {"--": "SE"}} + ).target_is_sorted_by(other_value_last) + assert result.equals(pd.Series([True, True, False, True, True, False, True])) + + df_null_first = dataset_class.from_dict( + { + "USUBJID": ["001", "001", "001", "001", "002", "002", "002"], + "SESEQ": [1, 2, 3, 4, 1, 2, 3], + "SESTDTC": [ + "", + "2006-01-01", + "2006-01-02", + "2006-01-03", + "2006-01-01", + "", + "2006-01-03", + ], + } + ) + other_value_first = { + "target": "--SEQ", + "within": "USUBJID", + "comparator": [ + {"name": "--STDTC", "sort_order": "ASC", "null_position": "first"} + ], + } + result = DataframeType( + {"value": df_null_first, "column_prefix_map": {"--": "SE"}} + ).target_is_sorted_by(other_value_first) + assert result.equals(pd.Series([True, True, True, True, True, False, True])) @pytest.mark.parametrize( From b5b23d26c5c6f0c15baf3e342f03bdbf91b6af88 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Thu, 26 Feb 2026 14:18:44 -0500 Subject: [PATCH 2/6] numeric check --- cdisc_rules_engine/check_operators/dataframe_operators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index a068bca11..7cd1e9191 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1725,6 +1725,7 @@ def check_target_ascending_in_sorted_group( """ is_valid = pd.Series(True, index=group.index) is_numeric_comparator = pd.api.types.is_numeric_dtype(group[comparator]) + is_numeric_target = pd.api.types.is_numeric_dtype(group[target]) null_mask = group[comparator].isna() | ( group[comparator].astype(str).str.strip() == "" @@ -1749,7 +1750,7 @@ def check_target_ascending_in_sorted_group( elif pd.isna(actual) or pd.isna(expected_val): is_valid.loc[non_null_indices[i]] = False elif ( - not is_numeric_comparator + not is_numeric_target and is_valid_date(actual) and is_valid_date(expected_val) ): From 833d8f2b648db86f39c6165e0c5fa529a8e79804 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 27 Feb 2026 11:42:56 -0500 Subject: [PATCH 3/6] test update --- cdisc_rules_engine/check_operators/dataframe_operators.py | 6 ------ .../test_relationship_integrity_checks.py | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 7cd1e9191..2f7c44bc1 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1849,10 +1849,6 @@ def _process_grouped_result( @log_operator_execution @type_operator(FIELD_DATAFRAME) def target_is_sorted_by(self, other_value: dict): - """ - Check if target is in ascending order when rows are sorted by comparator. - - """ target = other_value.get("target") within_columns = self._normalize_grouping_columns(other_value.get("within")) columns = other_value["comparator"] @@ -1868,7 +1864,6 @@ def target_is_sorted_by(self, other_value: dict): dict.fromkeys([target, comparator, *within_columns]) ) - # Sort by within columns (always ASC) and comparator in specified order sorted_df = self.value[selected_columns].sort_values( by=[*within_columns, target], ascending=[True] * (len(within_columns) + 1), @@ -1876,7 +1871,6 @@ def target_is_sorted_by(self, other_value: dict): grouped_df = sorted_df.groupby(within_columns, sort=False) - # Check 1: Target order matches expected comparator order target_check = grouped_df.apply( lambda x: self.check_target_ascending_in_sorted_group( x, target, comparator, ascending, na_pos diff --git a/tests/unit/test_check_operators/test_relationship_integrity_checks.py b/tests/unit/test_check_operators/test_relationship_integrity_checks.py index 73745882f..78c4e4af6 100644 --- a/tests/unit/test_check_operators/test_relationship_integrity_checks.py +++ b/tests/unit/test_check_operators/test_relationship_integrity_checks.py @@ -776,7 +776,7 @@ def test_target_is_sorted_by_dates(dataset_class): "2006-06-02", "2006-06-04", "2006-06-01", - "2006-06-05", + "2006-06", "2006-06-03", ], } @@ -784,7 +784,7 @@ def test_target_is_sorted_by_dates(dataset_class): result = DataframeType( {"value": df_invalid, "column_prefix_map": {"--": "SE"}} ).target_is_sorted_by(other_value) - assert result.equals(pd.Series([True, False, False, True, True])) + assert result.equals(pd.Series([True, False, False, False, False])) df_desc = dataset_class.from_dict( { From 554fa703cac2c95d5d582c3848372318abcceef3 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 27 Feb 2026 11:44:54 -0500 Subject: [PATCH 4/6] docs --- resources/schema/rule/Operator.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index c55f30576..3350df494 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1120,7 +1120,7 @@ Complement of `is_ordered_by` ### target_is_sorted_by -True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in wit hin.Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date values with different precisions that overlap (e.g. 2005-10 and 2005-10-08) are also flagged as not sorted as their order cannot be inferred. +True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date values with different precisions that overlap (e.g. 2005-10 and 2005-10-08) are flagged as not sorted as their order cannot be inferred. ```yaml Check: From 051ad08ceec1d985cb6bc02169ab79289b287a8e Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 27 Feb 2026 14:08:34 -0500 Subject: [PATCH 5/6] overlap --- .../check_operators/dataframe_operators.py | 58 ++++++++++--------- resources/schema/rule/Operator.md | 2 +- .../test_relationship_integrity_checks.py | 28 ++++++++- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 2f7c44bc1..4d022f641 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1655,7 +1655,7 @@ def _mark_invalid_null_positions(self, is_valid, group, null_mask, na_pos): first_null = min(index_order.index(i) for i in null_indices) if first_null < last_non_null: is_valid[null_mask] = False - else: # first + else: last_null = max(index_order.index(i) for i in null_indices) first_non_null = min(index_order.index(i) for i in non_null_indices) if last_null > first_non_null: @@ -1699,7 +1699,6 @@ def _verify_neighbor_consistency( ), None, ) - if not is_numeric_comparator and not is_valid_date(str(curr)): continue @@ -1734,12 +1733,20 @@ def check_target_ascending_in_sorted_group( is_valid = self._mark_invalid_null_positions(is_valid, group, null_mask, na_pos) - # Compare only non-null rows positionally - non_null_sorted = non_null_rows.sort_values(by=comparator, ascending=ascending) + # Exclude rows involved in date overlaps from positional check — + # their sort position is ambiguous and date_overlap_check handles flagging them + overlap_check = self.check_date_overlaps(group, target, comparator) + overlap_invalid_mask = ~overlap_check + non_null_rows_for_order_check = non_null_rows[ + ~overlap_invalid_mask[non_null_rows.index] + ] - actual_target = non_null_rows[target].tolist() + non_null_sorted = non_null_rows_for_order_check.sort_values( + by=comparator, ascending=ascending + ) + actual_target = non_null_rows_for_order_check[target].tolist() expected_target = non_null_sorted[target].tolist() - non_null_indices = non_null_rows.index.tolist() + non_null_indices = non_null_rows_for_order_check.index.tolist() for i in range(len(actual_target)): actual = actual_target[i] @@ -1761,9 +1768,11 @@ def check_target_ascending_in_sorted_group( else: if actual != expected_val: is_valid.loc[non_null_indices[i]] = False + + non_null_target_sorted = non_null_rows.sort_values(by=target, ascending=True) is_valid = self._verify_neighbor_consistency( is_valid, - non_null_rows, + non_null_target_sorted, target, comparator, ascending, @@ -1772,12 +1781,6 @@ def check_target_ascending_in_sorted_group( return is_valid def check_date_overlaps(self, group, target, comparator): - """ - Check for date overlaps in comparator column. - When dates have different precisions and overlap, mark them as invalid. - Only applies to date columns - returns all True for numeric columns. - Skips null comparator values. - """ comparator_values = group[comparator].tolist() is_valid = pd.Series(True, index=group.index) is_numeric = pd.api.types.is_numeric_dtype(group[comparator]) @@ -1785,7 +1788,6 @@ def check_date_overlaps(self, group, target, comparator): if is_numeric: return is_valid - # Only check non-null comparator values valid_positions = [ i for i in range(len(comparator_values)) @@ -1794,21 +1796,25 @@ def check_date_overlaps(self, group, target, comparator): ) ] - for i in range(len(valid_positions) - 1): + for i in range(len(valid_positions)): curr_pos = valid_positions[i] - next_pos = valid_positions[i + 1] current = comparator_values[curr_pos] - next_val = comparator_values[next_pos] - - if is_valid_date(current) and is_valid_date(next_val): - date1, prec1 = parse_date(current) - date2, prec2 = parse_date(next_val) - + if not is_valid_date(current): + continue + _, prec1 = parse_date(current) + for j in range(len(valid_positions)): + if i == j: + continue + other_pos = valid_positions[j] + other = comparator_values[other_pos] + if not is_valid_date(other): + continue + _, prec2 = parse_date(other) if prec1 != prec2: - overlaps, less_precise = dates_overlap(date1, prec1, date2, prec2) + overlaps, _ = dates_overlap(current, prec1, other, prec2) if overlaps: is_valid.iloc[curr_pos] = False - is_valid.iloc[next_pos] = False + is_valid.iloc[other_pos] = False return is_valid @@ -1886,7 +1892,6 @@ def target_is_sorted_by(self, other_value: dict): ), ) - # Check 2: No date overlaps in comparator (only for date columns) date_overlap_check = grouped_df.apply( lambda x: self.check_date_overlaps(x, target, comparator) ) @@ -1898,10 +1903,7 @@ def target_is_sorted_by(self, other_value: dict): lambda group: self.check_date_overlaps(group, target, comparator), ) - # Combine both checks combined_check = target_check & date_overlap_check - - # Map results back to original dataframe order result = result & combined_check.reindex(self.value.index, fill_value=True) if isinstance(result, (pd.DataFrame, dd.DataFrame)): diff --git a/resources/schema/rule/Operator.md b/resources/schema/rule/Operator.md index 3350df494..5218ff867 100644 --- a/resources/schema/rule/Operator.md +++ b/resources/schema/rule/Operator.md @@ -1120,7 +1120,7 @@ Complement of `is_ordered_by` ### target_is_sorted_by -True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date values with different precisions that overlap (e.g. 2005-10 and 2005-10-08) are flagged as not sorted as their order cannot be inferred. +True if the values in name are ordered according to the values specified by value in ascending/descending order, grouped by the values in within. Each value entry requires a variable name, a sort_order of asc or desc, and an optional null_position of first or last (defaults to last) which controls where null/empty comparator values are placed in the expected ordering. Within accepts either a single column or an ordered list of columns. Columns can be either number or Char Dates in ISO8601 YYYY-MM-DD format. Date value(s) with different precisions that overlap (e.g. 2005-10, 2005-10-3 and 2005-10-08) are all flagged as not sorted as their order cannot be inferred. ```yaml Check: diff --git a/tests/unit/test_check_operators/test_relationship_integrity_checks.py b/tests/unit/test_check_operators/test_relationship_integrity_checks.py index 78c4e4af6..8e8b78752 100644 --- a/tests/unit/test_check_operators/test_relationship_integrity_checks.py +++ b/tests/unit/test_check_operators/test_relationship_integrity_checks.py @@ -784,7 +784,33 @@ def test_target_is_sorted_by_dates(dataset_class): result = DataframeType( {"value": df_invalid, "column_prefix_map": {"--": "SE"}} ).target_is_sorted_by(other_value) - assert result.equals(pd.Series([True, False, False, False, False])) + assert result.equals(pd.Series([False, False, False, False, False])) + + df_partial = dataset_class.from_dict( + { + "USUBJID": [ + "CDISC001", + "CDISC001", + "CDISC001", + "CDISC001", + "CDISC001", + "CDISC001", + ], + "SESEQ": [1, 2, 3, 4, 5, 6], + "SESTDTC": [ + "2006-05-01", + "2006-06-01", + "2006-06-03", + "2006-06", + "2006-06-05", + "2006-06-07", + ], + } + ) + result = DataframeType( + {"value": df_partial, "column_prefix_map": {"--": "SE"}} + ).target_is_sorted_by(other_value) + assert result.equals(pd.Series([True, False, False, False, False, False])) df_desc = dataset_class.from_dict( { From a2bc0c59eb06b53806cbbf705aa3da9ea22beb58 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Fri, 27 Feb 2026 14:10:13 -0500 Subject: [PATCH 6/6] comment --- cdisc_rules_engine/check_operators/dataframe_operators.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cdisc_rules_engine/check_operators/dataframe_operators.py b/cdisc_rules_engine/check_operators/dataframe_operators.py index 4d022f641..f28ff69b7 100644 --- a/cdisc_rules_engine/check_operators/dataframe_operators.py +++ b/cdisc_rules_engine/check_operators/dataframe_operators.py @@ -1732,9 +1732,6 @@ def check_target_ascending_in_sorted_group( non_null_rows = group[~null_mask] is_valid = self._mark_invalid_null_positions(is_valid, group, null_mask, na_pos) - - # Exclude rows involved in date overlaps from positional check — - # their sort position is ambiguous and date_overlap_check handles flagging them overlap_check = self.check_date_overlaps(group, target, comparator) overlap_invalid_mask = ~overlap_check non_null_rows_for_order_check = non_null_rows[