Tidy up

treysp · treysp · commit fd915e90a117 · 2025-08-25T10:33:09.000-05:00
diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
@@ -239,7 +239,7 @@ jobs:
       - checkout
       - run:
           name: Install OS-level dependencies
-          command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>"      
+          command: ./.circleci/install-prerequisites.sh "<< parameters.engine >>"
       - run:
           name: Generate database name
           command: |
@@ -297,9 +297,8 @@ workflows:
           name: cloud_engine_<< matrix.engine >>
           context:
             - sqlmesh_cloud_database_integration
-          # TODO: uncomment this
-          # requires:
-          #   - engine_tests_docker
+          requires:
+            - engine_tests_docker
           matrix:
             parameters:
               engine:
@@ -308,14 +307,13 @@ workflows:
                 - redshift
                 - bigquery
                 - clickhouse-cloud
-                - athena                
+                - athena
                 - fabric
                 - gcp-postgres
-          # TODO: uncomment this
-          # filters:
-          #   branches:
-          #     only:
-          #       - main
+          filters:
+            branches:
+              only:
+                - main
       - ui_style
       - ui_test
       - vscode_test
diff --git a/docs/integrations/engines/snowflake.md b/docs/integrations/engines/snowflake.md
@@ -250,6 +250,14 @@ And confirm that our schemas and objects exist in the Snowflake catalog:
 
 Congratulations - your SQLMesh project is up and running on Snowflake!
 
+### Where are the row counts?
+
+SQLMesh reports the number of rows processed by each model in its `plan` and `run` terminal output.
+
+However, due to limitations in the Snowflake Python connector, row counts cannot be determined for `CREATE TABLE AS` statements. Therefore, SQLMesh does not report row counts for certain model kinds, such as `FULL` models.
+
+Learn more about the connector limitation [on Github](https://github.com/snowflakedb/snowflake-connector-python/issues/645).
+
 ## Local/Built-in Scheduler
 **Engine Adapter Type**: `snowflake`
 
diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py
@@ -4030,9 +4030,7 @@ def show_table_diff_summary(self, table_diff: TableDiff) -> None:
         self._write(f"Join On: {keys}")
 
 
-# TODO: remove this
-# _CONSOLE: Console = NoopConsole()
-_CONSOLE: Console = TerminalConsole()
+_CONSOLE: Console = NoopConsole()
 
 
 def set_console(console: Console) -> None:
diff --git a/sqlmesh/core/engine_adapter/base.py b/sqlmesh/core/engine_adapter/base.py
@@ -2464,7 +2464,7 @@ def _execute(self, sql: str, track_rows_processed: bool = False, **kwargs: t.Any
                 try:
                     rowcount = int(rowcount_raw)
                 except (TypeError, ValueError):
-                    pass
+                    return
 
             self._record_execution_stats(sql, rowcount)
 
diff --git a/sqlmesh/core/engine_adapter/snowflake.py b/sqlmesh/core/engine_adapter/snowflake.py
@@ -680,21 +680,25 @@ def _record_execution_stats(
 
         We do not want to record the incorrect row count of 1, so we check whether that row contains the table
         successfully created string. If so, we return early and do not record the row count.
+
+        Ref: https://github.com/snowflakedb/snowflake-connector-python/issues/645
         """
         if rowcount == 1:
             results = self.cursor.fetchone()
             if results:
                 try:
                     results_str = str(results[0])
-                except (ValueError, TypeError):
+                except (TypeError, ValueError, IndexError):
                     return
 
                 # Snowflake identifiers may be:
                 # - An unquoted contiguous set of [a-zA-Z0-9_$] characters
                 # - A double-quoted string that may contain spaces and nested double-quotes represented by `""`. Example: " my ""table"" name "
-                is_created = re.match(r'Table [a-zA-Z0-9_$"]*? successfully created\.', results_str)
+                is_created = re.match(
+                    r'Table [a-zA-Z0-9_$ "]*? successfully created\.', results_str
+                )
                 is_already_exists = re.match(
-                    r'[a-zA-Z0-9_$"]*? already exists, statement succeeded\.',
+                    r'[a-zA-Z0-9_$ "]*? already exists, statement succeeded\.',
                     results_str,
                 )
                 if is_created or is_already_exists:
diff --git a/sqlmesh/core/scheduler.py b/sqlmesh/core/scheduler.py
@@ -514,21 +514,18 @@ def run_node(node: SchedulingUnit) -> None:
                             execution_time=execution_time,
                         )
                     else:
-                        with self.snapshot_evaluator.execution_tracker.track_execution(
-                            f"{snapshot.name}_{node.batch_index}"
-                        ) as execution_context:
-                            audit_results = self.evaluate(
-                                snapshot=snapshot,
-                                environment_naming_info=environment_naming_info,
-                                start=start,
-                                end=end,
-                                execution_time=execution_time,
-                                deployability_index=deployability_index,
-                                batch_index=node.batch_index,
-                                allow_destructive_snapshots=allow_destructive_snapshots,
-                                allow_additive_snapshots=allow_additive_snapshots,
-                                target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
-                            )
+                        audit_results = self.evaluate(
+                            snapshot=snapshot,
+                            environment_naming_info=environment_naming_info,
+                            start=start,
+                            end=end,
+                            execution_time=execution_time,
+                            deployability_index=deployability_index,
+                            batch_index=node.batch_index,
+                            allow_destructive_snapshots=allow_destructive_snapshots,
+                            allow_additive_snapshots=allow_additive_snapshots,
+                            target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
+                        )
 
                     evaluation_duration_ms = now_timestamp() - execution_start_ts
                 finally:
@@ -547,6 +544,9 @@ def run_node(node: SchedulingUnit) -> None:
                         num_audits - num_audits_failed,
                         num_audits_failed,
                         execution_stats=execution_stats,
+                        auto_restatement_triggers=auto_restatement_triggers.get(
+                            snapshot.snapshot_id
+                        ),
                     )
             elif isinstance(node, CreateNode):
                 self.snapshot_evaluator.create_snapshot(
diff --git a/sqlmesh/core/snapshot/execution_tracker.py b/sqlmesh/core/snapshot/execution_tracker.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import time
 import typing as t
 from contextlib import contextmanager
 from threading import local, Lock
@@ -12,10 +11,6 @@ class QueryExecutionStats:
     snapshot_batch_id: str
     total_rows_processed: t.Optional[int] = None
     total_bytes_processed: t.Optional[int] = None
-    query_count: int = 0
-    queries_executed: t.List[t.Tuple[str, t.Optional[int], t.Optional[int], float]] = field(
-        default_factory=list
-    )
 
 
 @dataclass
@@ -26,10 +21,8 @@ class QueryExecutionContext:
     It accumulates statistics from multiple cursor.execute() calls during a single snapshot evaluation.
 
     Attributes:
-        id: Identifier linking this context to a specific operation
-        total_rows_processed: Running sum of cursor.rowcount from all executed queries during evaluation
-        query_count: Total number of SQL statements executed
-        queries_executed: List of (sql_snippet, row_count, timestamp) tuples for debugging
+        snapshot_batch_id: Identifier linking this context to a specific snapshot evaluation
+        stats: Running sum of cursor.rowcount and possibly bytes processed from all executed queries during evaluation
     """
 
     snapshot_batch_id: str
@@ -55,20 +48,12 @@ def add_execution(
                 else:
                     self.stats.total_bytes_processed += bytes_processed
 
-        self.stats.query_count += 1
-        # TODO: remove this
-        # for debugging
-        self.stats.queries_executed.append((sql[:300], row_count, bytes_processed, time.time()))
-
     def get_execution_stats(self) -> QueryExecutionStats:
         return self.stats
 
 
 class QueryExecutionTracker:
-    """
-    Thread-local context manager for snapshot execution statistics, such as
-    rows processed.
-    """
+    """Thread-local context manager for snapshot execution statistics, such as rows processed."""
 
     _thread_local = local()
     _contexts: t.Dict[str, QueryExecutionContext] = {}
@@ -86,9 +71,7 @@ def is_tracking(cls) -> bool:
     def track_execution(
         self, snapshot_id_batch: str
     ) -> t.Iterator[t.Optional[QueryExecutionContext]]:
-        """
-        Context manager for tracking snapshot execution statistics.
-        """
+        """Context manager for tracking snapshot execution statistics such as row counts and bytes processed."""
         context = QueryExecutionContext(snapshot_batch_id=snapshot_id_batch)
         self._thread_local.context = context
         with self._contexts_lock:
diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py
@@ -2434,6 +2434,7 @@ def capture_execution_stats(
         num_audits_failed,
         audit_only=False,
         execution_stats=None,
+        auto_restatement_triggers=None,
     ):
         if execution_stats is not None:
             actual_execution_stats[snapshot.model.name.replace(f"{schema_name}.", "")] = (
diff --git a/tests/core/engine_adapter/integration/test_integration_snowflake.py b/tests/core/engine_adapter/integration/test_integration_snowflake.py
@@ -12,8 +12,12 @@
 from sqlmesh.core.plan import Plan
 from tests.core.engine_adapter.integration import TestContext
 from sqlmesh import model, ExecutionContext
+from pytest_mock import MockerFixture
+from sqlmesh.core.snapshot.execution_tracker import (
+    QueryExecutionContext,
+    QueryExecutionTracker,
+)
 from sqlmesh.core.model import ModelKindName
-from sqlmesh.core.snapshot.execution_tracker import QueryExecutionTracker
 from datetime import datetime
 
 from tests.core.engine_adapter.integration import (
@@ -310,10 +314,14 @@ def fetch_database_names() -> t.Set[str]:
     assert fetch_database_names() == {non_sqlmesh_managed_catalog}
 
 
-def test_rows_tracker(ctx: TestContext, engine_adapter: SnowflakeEngineAdapter):
+def test_rows_tracker(
+    ctx: TestContext, engine_adapter: SnowflakeEngineAdapter, mocker: MockerFixture
+):
     sqlmesh = ctx.create_context()
     tracker = QueryExecutionTracker()
 
+    add_execution_spy = mocker.spy(QueryExecutionContext, "add_execution")
+
     with tracker.track_execution("a"):
         # Snowflake doesn't report row counts for CTAS, so this should not be tracked
         engine_adapter.execute(
@@ -322,7 +330,8 @@ def test_rows_tracker(ctx: TestContext, engine_adapter: SnowflakeEngineAdapter):
         engine_adapter.execute("INSERT INTO a VALUES (2), (3)", track_rows_processed=True)
         engine_adapter.execute("INSERT INTO a VALUES (4)", track_rows_processed=True)
 
+    assert add_execution_spy.call_count == 2
+
     stats = tracker.get_execution_stats("a")
     assert stats is not None
-    assert stats.query_count == 2
     assert stats.total_rows_processed == 3
diff --git a/tests/core/test_execution_tracker.py b/tests/core/test_execution_tracker.py
@@ -34,6 +34,4 @@ def worker(id: str, row_counts: list[int]) -> QueryExecutionStats:
     by_batch = {s.snapshot_batch_id: s for s in results}
 
     assert by_batch["batch_A"].total_rows_processed == 15
-    assert by_batch["batch_A"].query_count == 2
     assert by_batch["batch_B"].total_rows_processed == 10
-    assert by_batch["batch_B"].query_count == 2