cdisc-org · SFJohnson24 · Feb 24, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/...les_engine/dataset_builders/variables_metadata_with_define_and_library_dataset_builder.py b/...les_engine/dataset_builders/variables_metadata_with_define_and_library_dataset_builder.py
@@ -13,6 +13,8 @@ def build(self):
         variable_label
         variable_size
         variable_data_type
+        variable_is_empty
+        variable_has_empty_values
         define_variable_name,
         define_variable_label,
         define_variable_data_type,
@@ -29,7 +31,6 @@ def build(self):
         define_variable_codelist_coded_values,
         define_variable_codelist_coded_codes,
         define_variable_mandatory,
-        variable_has_empty_values
         library_variable_name,
         library_variable_label,
         library_variable_data_type,
@@ -82,24 +83,22 @@ def build(self):
             right_on="library_variable_name",
         ).fillna("")
 
-        final_dataframe["variable_has_empty_values"] = final_dataframe.apply(
-            lambda row: self.variable_has_null_values(
-                (
-                    row["variable_name"]
-                    if row["variable_name"] != ""
-                    else row["library_variable_name"]
+        final_dataframe[["variable_has_empty_values", "variable_is_empty"]] = (
+            final_dataframe.apply(
+                lambda row: self.get_variable_null_stats(
+                    row["variable_name"], dataset_contents
                 ),
-                dataset_contents,
-            ),
-            axis=1,
+                axis=1,
+                result_type="expand",
+            )
         )
 
         return final_dataframe
 
-    def variable_has_null_values(
+    def get_variable_null_stats(
         self, variable: str, content: DatasetInterface
-    ) -> bool:
+    ) -> tuple[bool, bool]:
         if variable not in content:
-            return True
-        series = content[variable]
-        return series.mask(series == "").isnull().any()
+            return True, True
+        series = content[variable].mask(content[variable] == "")
+        return series.isnull().any(), series.isnull().all()
diff --git a/cdisc_rules_engine/dataset_builders/variables_metadata_with_library_metadata.py b/cdisc_rules_engine/dataset_builders/variables_metadata_with_library_metadata.py
@@ -13,6 +13,7 @@ def build(self):
         variable_size
         variable_data_type
         variable_has_empty_values
+        variable_is_empty
         library_variable_name,
         library_variable_label,
         library_variable_data_type,
@@ -57,18 +58,20 @@ def build(self):
             right_on="library_variable_name",
         ).fillna("")
 
-        data["variable_has_empty_values"] = data.apply(
-            lambda row: self.variable_has_null_values(
+        data[["variable_has_empty_values", "variable_is_empty"]] = data.apply(
+            lambda row: self.get_variable_null_stats(
                 row["variable_name"], dataset_contents
             ),
             axis=1,
+            result_type="expand",
         )
+
         return data
 
-    def variable_has_null_values(
+    def get_variable_null_stats(
         self, variable: str, content: DatasetInterface
-    ) -> bool:
+    ) -> tuple[bool, bool]:
         if variable not in content:
-            return True
-        series = content[variable]
-        return series.mask(series == "").isnull().any()
+            return True, True
+        series = content[variable].mask(content[variable] == "")
+        return series.isnull().any(), series.isnull().all()
diff --git a/cdisc_rules_engine/models/operation_params.py b/cdisc_rules_engine/models/operation_params.py
@@ -56,6 +56,7 @@ class OperationParams:
     original_target: str = None
     regex: str = None
     returntype: str = None
+    source: str = None
     target: str = None
     value_is_reference: bool = False
     namespace: str = None

diff --git a/cdisc_rules_engine/operations/base_operation.py b/cdisc_rules_engine/operations/base_operation.py
@@ -173,10 +173,10 @@ def _filter_data(self, data):
     def _is_wildcard_pattern(self, value: str) -> bool:
         if not isinstance(value, str):
             return False
-        return value.endswith("%")
+        return value.endswith("&")
 
     def _apply_wildcard_filter(self, series: pd.Series, pattern: str) -> pd.Series:
-        prefix = pattern.rstrip("%")
+        prefix = pattern.rstrip("&")
         result = series.str.startswith(prefix, na=False)
         return result
 

diff --git a/cdisc_rules_engine/operations/variable_is_null.py b/cdisc_rules_engine/operations/variable_is_null.py
@@ -3,19 +3,16 @@
 
 class VariableIsNull(BaseOperation):
     def _execute_operation(self):
-        # Always get the content dataframe. Similar to variable_exists check
-        dataframe = self.data_service.get_dataset(dataset_name=self.params.dataset_path)
-        if self.params.target.startswith("define_variable"):
-            # Handle checks against define metadata
-            target_column = self.evaluation_dataset[self.params.target]
-            result = [
-                self._is_target_variable_null(dataframe, value)
-                for value in target_column
-            ]
-            return self.data_service.dataset_implementation().convert_to_series(result)
+        if self.params.source == "submission":
+            if self.params.level == "row":
+                raise ValueError("level: row may only be used with source: evaluation")
+            dataframe = self.data_service.get_dataset(
+                dataset_name=self.params.dataset_path
+            )
         else:
-            target_variable = self.params.target
-            return self._is_target_variable_null(dataframe, target_variable)
+            dataframe = self.evaluation_dataset
+
+        return self._is_target_variable_null(dataframe, self.params.target)
 
     def _is_target_variable_null(self, dataframe, target_variable: str) -> bool:
         if target_variable not in dataframe:

diff --git a/cdisc_rules_engine/utilities/rule_processor.py b/cdisc_rules_engine/utilities/rule_processor.py
@@ -393,6 +393,7 @@ def perform_rule_operations(
                 original_target=original_target,
                 regex=operation.get("regex"),
                 returntype=operation.get("returntype"),
+                source=operation.get("source"),
                 standard=standard,
                 standard_substandard=standard_substandard,
                 standard_version=standard_version,

diff --git a/resources/schema/rule/MetaVariables.json b/resources/schema/rule/MetaVariables.json
@@ -146,6 +146,7 @@
     },
     { "const": "variable_format" },
     { "const": "variable_has_empty_values" },
+    { "const": "variable_is_empty" },
     { "const": "variable_label" },
     { "const": "variable_name" },
     {

diff --git a/resources/schema/rule/MetaVariables.md b/resources/schema/rule/MetaVariables.md
@@ -238,6 +238,10 @@ Variable format
 
 True/False value indicating whether a variable has any empty values
 
+## variable_is_empty
+
+True/False value indicating whether a variable is completely empty
+
 ## variable_label
 
 Variable long label

diff --git a/resources/schema/rule/Operations.json b/resources/schema/rule/Operations.json
@@ -557,6 +557,9 @@
       "type": "string",
       "enum": ["code", "value", "pref_term"]
     },
+    "source": {
+      "type": "string"
+    },
     "term_value": {
       "type": "string"
     },

diff --git a/resources/schema/rule/Operations.md b/resources/schema/rule/Operations.md
@@ -1013,7 +1013,7 @@ Operations:
 
 ### record_count
 
-If no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.
+If no filter or group is provided, returns the number of records in the dataset. If filter is provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter. Filter can have a wildcard `&` that when added to the end of the filter value will look for all instances of that prefix (see 4th example below). If group is provided, returns the number of rows matching each unique set of the grouping variables. These can be static column name(s) or can be derived from other operations like get_dataset_filtered_variables.
 
 If both filter and group are provided, returns the number of records in the dataset that contain the value(s) in the corresponding column(s) provided in the filter that also match each unique set of the grouping variables.
 
@@ -1058,7 +1058,7 @@ Example: return the number of records where QNAM starts with "RACE" (matches RAC
 - operation: record_count
   id: $race_records_in_dataset
   filter:
-    QNAM: "RACE%"
+    QNAM: "RACE&"
   group:
     - "USUBJID"
 ```
@@ -1291,7 +1291,7 @@ Match Datasets:
 
 ### variable_exists
 
-Flag an error if MIDS is in the dataset currently being evaluated and the TM domain is not present in the study
+Operation operates only on original submission datasets regardless of rule type. Flags an error if a column exists is in the submission dataset currently being evaluated.
 
 Rule Type: Domain Presence Check
 
@@ -1312,13 +1312,18 @@ Operations:
 ### variable_is_null
 
 Returns true if a variable is missing from the dataset or if all values within the variable are null or empty string. This operation first checks if the target variable exists in the dataset, and if it does exist, evaluates whether all its values are null or empty.
-The operation can work with both direct variable names and define metadata references (variables starting with "define_variable").
+The operation supports two sources via the `source` parameter:
+
+- **`submission`** : checks against the raw submission dataset
+- **`evaluation`** (default): checks against the evaluation dataset built based on the rule type
 
 ```yaml
+# Dataset level check - is this variable entirely null/missing from the source data?
 Operations:
   - operator: variable_is_null
     name: USUBJID
-    id: $aeterm_is_null
+    id: $usubjid_is_null
+    source: submission
 ```
 
 ### get_xhtml_errors

diff --git a/resources/schema/rule/Rule_Type.md b/resources/schema/rule/Rule_Type.md
@@ -555,6 +555,7 @@ Attach define xml metadata at variable level
 - `variable_data_type`
 - `variable_format`
 - `variable_has_empty_values`
+- `variable_is_empty`
 - `library_variable_name`
 - `library_variable_role`
 - `library_variable_label`
@@ -572,6 +573,8 @@ Attach define xml metadata at variable level
 - `variable_size`
 - `variable_order_number`
 - `variable_data_type`
+- `variable_has_empty_values`
+- `variable_is_empty`
 - `define_variable_name`
 - `define_variable_label`
 - `define_variable_data_type`
@@ -597,7 +600,6 @@ Attach define xml metadata at variable level
 - `library_variable_order_number`
 - `library_variable_data_type`
 - `library_variable_ccode`
-- `variable_has_empty_values`
 
 ## JSON Schema Check