Tidy up

treysp · treysp · commit f072873997db · 2025-08-22T14:29:37.000-05:00
diff --git a/docs/integrations/engines/snowflake.md b/docs/integrations/engines/snowflake.md
@@ -250,6 +250,14 @@ And confirm that our schemas and objects exist in the Snowflake catalog:
 
 Congratulations - your SQLMesh project is up and running on Snowflake!
 
+### Where are the row counts?
+
+SQLMesh reports the number of rows processed by each model in its `plan` and `run` terminal output.
+
+However, due to limitations in the Snowflake Python connector, row counts cannot be determined for `CREATE TABLE AS` statements. Therefore, SQLMesh does not report row counts for certain model kinds, such as `FULL` models.
+
+Learn more about this connector limitation in this [Github issue](https://github.com/snowflakedb/snowflake-connector-python/issues/645).
+
 ## Local/Built-in Scheduler
 **Engine Adapter Type**: `snowflake`
 
diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py
@@ -2464,7 +2464,7 @@ def _execute(self, sql: str, track_rows_processed: bool = False, **kwargs: t.Any
                 try:
                     rowcount = int(rowcount_raw)
                 except (TypeError, ValueError):
-                    pass
+                    return
 
             self._record_execution_stats(sql, rowcount)
 
diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py
@@ -686,15 +686,17 @@ def _record_execution_stats(
             if results:
                 try:
                     results_str = str(results[0])
-                except (ValueError, TypeError):
+                except (TypeError, ValueError, IndexError):
                     return
 
                 # Snowflake identifiers may be:
                 # - An unquoted contiguous set of [a-zA-Z0-9_$] characters
                 # - A double-quoted string that may contain spaces and nested double-quotes represented by `""`. Example: " my ""table"" name "
-                is_created = re.match(r'Table [a-zA-Z0-9_$"]*? successfully created\.', results_str)
+                is_created = re.match(
+                    r'Table [a-zA-Z0-9_$ "]*? successfully created\.', results_str
+                )
                 is_already_exists = re.match(
-                    r'[a-zA-Z0-9_$"]*? already exists, statement succeeded\.',
+                    r'[a-zA-Z0-9_$ "]*? already exists, statement succeeded\.',
                     results_str,
                 )
                 if is_created or is_already_exists:
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -514,21 +514,18 @@ def run_node(node: SchedulingUnit) -> None:
                             execution_time=execution_time,
                         )
                     else:
-                        with self.snapshot_evaluator.execution_tracker.track_execution(
-                            f"{snapshot.name}_{node.batch_index}"
-                        ) as execution_context:
-                            audit_results = self.evaluate(
-                                snapshot=snapshot,
-                                environment_naming_info=environment_naming_info,
-                                start=start,
-                                end=end,
-                                execution_time=execution_time,
-                                deployability_index=deployability_index,
-                                batch_index=node.batch_index,
-                                allow_destructive_snapshots=allow_destructive_snapshots,
-                                allow_additive_snapshots=allow_additive_snapshots,
-                                target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
-                            )
+                        audit_results = self.evaluate(
+                            snapshot=snapshot,
+                            environment_naming_info=environment_naming_info,
+                            start=start,
+                            end=end,
+                            execution_time=execution_time,
+                            deployability_index=deployability_index,
+                            batch_index=node.batch_index,
+                            allow_destructive_snapshots=allow_destructive_snapshots,
+                            allow_additive_snapshots=allow_additive_snapshots,
+                            target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
+                        )
 
                     evaluation_duration_ms = now_timestamp() - execution_start_ts
                 finally:
@@ -547,6 +544,9 @@ def run_node(node: SchedulingUnit) -> None:
                         num_audits - num_audits_failed,
                         num_audits_failed,
                         execution_stats=execution_stats,
+                        auto_restatement_triggers=auto_restatement_triggers.get(
+                            snapshot.snapshot_id
+                        ),
                     )
             elif isinstance(node, CreateNode):
                 self.snapshot_evaluator.create_snapshot(
diff --git a/sqlmesh/core/snapshot/execution_tracker.py b/sqlmesh/core/snapshot/execution_tracker.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import time
 import typing as t
 from contextlib import contextmanager
 from threading import local, Lock
@@ -12,10 +11,6 @@ class QueryExecutionStats:
     snapshot_batch_id: str
     total_rows_processed: t.Optional[int] = None
     total_bytes_processed: t.Optional[int] = None
-    query_count: int = 0
-    queries_executed: t.List[t.Tuple[str, t.Optional[int], t.Optional[int], float]] = field(
-        default_factory=list
-    )
 
 
 @dataclass
@@ -26,10 +21,8 @@ class QueryExecutionContext:
     It accumulates statistics from multiple cursor.execute() calls during a single snapshot evaluation.
 
     Attributes:
-        id: Identifier linking this context to a specific operation
-        total_rows_processed: Running sum of cursor.rowcount from all executed queries during evaluation
-        query_count: Total number of SQL statements executed
-        queries_executed: List of (sql_snippet, row_count, timestamp) tuples for debugging
+        snapshot_batch_id: Identifier linking this context to a specific snapshot evaluation
+        stats: Running sum of cursor.rowcount and possibly bytes processed from all executed queries during evaluation
     """
 
     snapshot_batch_id: str
@@ -55,20 +48,12 @@ def add_execution(
                 else:
                     self.stats.total_bytes_processed += bytes_processed
 
-        self.stats.query_count += 1
-        # TODO: remove this
-        # for debugging
-        self.stats.queries_executed.append((sql[:300], row_count, bytes_processed, time.time()))
-
     def get_execution_stats(self) -> QueryExecutionStats:
         return self.stats
 
 
 class QueryExecutionTracker:
-    """
-    Thread-local context manager for snapshot execution statistics, such as
-    rows processed.
-    """
+    """Thread-local context manager for snapshot execution statistics, such as rows processed."""
 
     _thread_local = local()
     _contexts: t.Dict[str, QueryExecutionContext] = {}
@@ -86,9 +71,7 @@ def is_tracking(cls) -> bool:
     def track_execution(
         self, snapshot_id_batch: str
     ) -> t.Iterator[t.Optional[QueryExecutionContext]]:
-        """
-        Context manager for tracking snapshot execution statistics.
-        """
+        """Context manager for tracking snapshot execution statistics such as row counts and bytes processed."""
         context = QueryExecutionContext(snapshot_batch_id=snapshot_id_batch)
         self._thread_local.context = context
         with self._contexts_lock:
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -2434,6 +2434,7 @@ def capture_execution_stats(
         num_audits_failed,
         audit_only=False,
         execution_stats=None,
+        auto_restatement_triggers=None,
     ):
         if execution_stats is not None:
             actual_execution_stats[snapshot.model.name.replace(f"{schema_name}.", "")] = (
diff --git a/tests/core/engine_adapter/integration/test_integration_snowflake.py b/tests/core/engine_adapter/integration/test_integration_snowflake.py
@@ -324,5 +324,4 @@ def test_rows_tracker(ctx: TestContext, engine_adapter: SnowflakeEngineAdapter):
 
     stats = tracker.get_execution_stats("a")
     assert stats is not None
-    assert stats.query_count == 2
     assert stats.total_rows_processed == 3
diff --git a/tests/core/test_execution_tracker.py b/tests/core/test_execution_tracker.py
@@ -34,6 +34,4 @@ def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
     by_batch = {s.snapshot_batch_id: s for s in results}
 
     assert by_batch["batch_A"].total_rows_processed == 15
-    assert by_batch["batch_A"].query_count == 2
     assert by_batch["batch_B"].total_rows_processed == 10
-    assert by_batch["batch_B"].query_count == 2