From 6ce1eb4b07fa03f24087817602adb6e4aa86ac24 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Mon, 8 Dec 2025 14:06:05 -0500 Subject: [PATCH 1/2] record count refactor --- cdisc_rules_engine/operations/record_count.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/cdisc_rules_engine/operations/record_count.py b/cdisc_rules_engine/operations/record_count.py index 9517ff412..716edd8f0 100644 --- a/cdisc_rules_engine/operations/record_count.py +++ b/cdisc_rules_engine/operations/record_count.py @@ -28,7 +28,7 @@ def _execute_operation(self) -> pd.Series: if self.params.regex else effective_grouping ) - if self.params.regex: + if self.params.regex and not filtered: group_df = self._get_regex_grouped_counts( self.params.dataframe, grouping_for_operations ) @@ -102,19 +102,12 @@ def _get_regex_grouped_counts(self, dataframe, grouping_columns): grouped_counts[col] = grouped_counts[col].astype( df_for_grouping[col].dtype ) - original_with_idx = dataframe[grouping_columns].copy() - original_with_idx["_idx"] = range(len(original_with_idx)) - transformed_with_idx = df_for_grouping.copy() - transformed_with_idx["_idx"] = range(len(transformed_with_idx)) - transformed_with_counts = transformed_with_idx.merge( + transformed_with_counts = df_for_grouping.merge( grouped_counts, on=grouping_columns, how="left" ) - original_with_idx["size"] = transformed_with_counts["size"].values - result = ( - original_with_idx.drop(columns=["_idx"]) - .groupby(grouping_columns, as_index=False, dropna=False) - .first() - ) + result = dataframe[grouping_columns].copy() + result["size"] = transformed_with_counts["size"].values + result = result.groupby(grouping_columns, as_index=False, dropna=False).first() return result def _apply_regex_to_grouping_columns( From 98e810495c58f98c3626243be60c0b30ca172b14 Mon Sep 17 00:00:00 2001 From: Samuel Johnson Date: Tue, 9 Dec 2025 15:25:39 -0500 Subject: [PATCH 2/2] none check --- cdisc_rules_engine/operations/record_count.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cdisc_rules_engine/operations/record_count.py b/cdisc_rules_engine/operations/record_count.py index 716edd8f0..9b9e29827 100644 --- a/cdisc_rules_engine/operations/record_count.py +++ b/cdisc_rules_engine/operations/record_count.py @@ -28,7 +28,7 @@ def _execute_operation(self) -> pd.Series: if self.params.regex else effective_grouping ) - if self.params.regex and not filtered: + if self.params.regex and filtered is None: group_df = self._get_regex_grouped_counts( self.params.dataframe, grouping_for_operations )