Docs: clarify plan vs. run behavior of blocking audits (#5197)

treysp · erindru · commit e6bc41377678 · 2025-08-21T21:30:17.000Z
diff --git a/docs/concepts/audits.md b/docs/concepts/audits.md
@@ -9,6 +9,32 @@ A comprehensive suite of audits can identify data issues upstream, whether they
 
 **NOTE**: For incremental by time range models, audits are only applied to intervals being processed - not for the entire underlying table.
 
+## Blocking audits
+A failed blocking audit halts the execution of a `plan` or `run` to prevent invalid data from propagating to downstream models. The impact of a failure depends on whether you are running a `plan` or a `run`.
+
+SQLMesh's blocking audit process is:
+
+1. Evaluate the model (e.g., insert new data or rebuild the table)
+2. Run the audit query against the newly updated model table. For incremental models, the audit only runs on the processed time intervals.
+3. If the query returns any rows, the audit fails, halting the `plan` or `run`.
+
+### Plan vs. Run
+
+The key difference is when the model's data is promoted to the production environment:
+
+*   **`plan`**: SQLMesh evaluates and audits all modified models *before* promoting them to production. If an audit fails, the `plan` stops, and the production table is untouched. Invalid data is contained in an isolated table and never reaches the production environment.
+
+*   **`run`**: SQLMesh evaluates and audits models directly against the production environment. If an audit fails, the `run` stops, but the invalid data *is already present* in the production table. The "blocking" action prevents this bad data from being used to build other downstream models.
+
+### Fixing a Failed Audit
+
+If a blocking audit fails during a `run`, you must fix the invalid data in the production table. To do so:
+
+1.  **Find the root cause**: examine upstream models and data sources
+2.  **Fix the source**
+    *   If the cause is an **external data source**, fix it there. Then, run a [restatement plan](./plans.md#restatement-plans) on the first SQLMesh model that ingests the source data. This will restate all downstream models, including the one with the failed audit.
+    *   If the cause is a **SQLMesh model**, update the model's logic. Then apply the change with a `plan`, which will automatically re-evaluate all downstream models.
+
 ## User-Defined Audits
 In SQLMesh, user-defined audits are defined in `.sql` files in an `audits` directory in your SQLMesh project. Multiple audits can be defined in a single file, so you can organize them to your liking. Alternatively, audits can be defined inline within the model definition itself.
 
diff --git a/sqlmesh/core/config/loader.py b/sqlmesh/core/config/loader.py
@@ -175,6 +175,7 @@ def load_config_from_paths(
                 project_root=dbt_project_file.parent,
                 dbt_profile_name=kwargs.pop("profile", None),
                 dbt_target_name=kwargs.pop("target", None),
+                variables=kwargs.pop("variables", None),
             )
             if type(dbt_python_config) != config_type:
                 dbt_python_config = convert_config_type(dbt_python_config, config_type)
diff --git a/sqlmesh_dbt/cli.py b/sqlmesh_dbt/cli.py
@@ -4,12 +4,31 @@
 from sqlmesh_dbt.operations import DbtOperations, create
 from sqlmesh_dbt.error import cli_global_error_handler
 from pathlib import Path
+from sqlmesh_dbt.options import YamlParamType
+import functools
 
 
-def _get_dbt_operations(ctx: click.Context) -> DbtOperations:
-    if not isinstance(ctx.obj, DbtOperations):
+def _get_dbt_operations(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]]) -> DbtOperations:
+    if not isinstance(ctx.obj, functools.partial):
         raise ValueError(f"Unexpected click context object: {type(ctx.obj)}")
-    return ctx.obj
+
+    dbt_operations = ctx.obj(vars=vars)
+
+    if not isinstance(dbt_operations, DbtOperations):
+        raise ValueError(f"Unexpected dbt operations type: {type(dbt_operations)}")
+
+    @ctx.call_on_close
+    def _cleanup() -> None:
+        dbt_operations.close()
+
+    return dbt_operations
+
+
+vars_option = click.option(
+    "--vars",
+    type=YamlParamType(),
+    help="Supply variables to the project. This argument overrides variables defined in your dbt_project.yml file. This argument should be a YAML string, eg. '{my_variable: my_value}'",
+)
 
 
 @click.group(invoke_without_command=True)
@@ -28,8 +47,9 @@ def dbt(
         # we dont need to import sqlmesh/load the project for CLI help
         return
 
-    # TODO: conditionally call create() if there are times we dont want/need to import sqlmesh and load a project
-    ctx.obj = create(project_dir=Path.cwd(), profile=profile, target=target)
+    # we have a partially applied function here because subcommands might set extra options like --vars
+    # that need to be known before we attempt to load the project
+    ctx.obj = functools.partial(create, project_dir=Path.cwd(), profile=profile, target=target)
 
     if not ctx.invoked_subcommand:
         click.echo(
@@ -44,17 +64,24 @@ def dbt(
     "--full-refresh",
     help="If specified, dbt will drop incremental models and fully-recalculate the incremental table from the model definition.",
 )
+@vars_option
 @click.pass_context
-def run(ctx: click.Context, select: t.Optional[str], full_refresh: bool) -> None:
+def run(
+    ctx: click.Context,
+    vars: t.Optional[t.Dict[str, t.Any]],
+    select: t.Optional[str],
+    full_refresh: bool,
+) -> None:
     """Compile SQL and execute against the current target database."""
-    _get_dbt_operations(ctx).run(select=select, full_refresh=full_refresh)
+    _get_dbt_operations(ctx, vars).run(select=select, full_refresh=full_refresh)
 
 
 @dbt.command(name="list")
+@vars_option
 @click.pass_context
-def list_(ctx: click.Context) -> None:
+def list_(ctx: click.Context, vars: t.Optional[t.Dict[str, t.Any]]) -> None:
     """List the resources in your project"""
-    _get_dbt_operations(ctx).list_()
+    _get_dbt_operations(ctx, vars).list_()
 
 
 @dbt.command(name="ls", hidden=True)  # hidden alias for list
diff --git a/sqlmesh_dbt/error.py b/sqlmesh_dbt/error.py
@@ -25,17 +25,5 @@ def wrapper(*args: t.List[t.Any], **kwargs: t.Any) -> t.Any:
                 sys.exit(1)
             else:
                 raise
-        finally:
-            context_or_obj = args[0]
-            sqlmesh_context = (
-                context_or_obj.obj if isinstance(context_or_obj, click.Context) else context_or_obj
-            )
-            if sqlmesh_context is not None:
-                # important to import this only if a context was created
-                # otherwise something like `sqlmesh_dbt run --help` will trigger this import because it's in the finally: block
-                from sqlmesh import Context
-
-                if isinstance(sqlmesh_context, Context):
-                    sqlmesh_context.close()
 
     return wrapper
diff --git a/sqlmesh_dbt/operations.py b/sqlmesh_dbt/operations.py
@@ -51,11 +51,15 @@ def console(self) -> DbtCliConsole:
 
         return console
 
+    def close(self) -> None:
+        self.context.close()
+
 
 def create(
     project_dir: t.Optional[Path] = None,
     profile: t.Optional[str] = None,
     target: t.Optional[str] = None,
+    vars: t.Optional[t.Dict[str, t.Any]] = None,
     debug: bool = False,
 ) -> DbtOperations:
     with Progress(transient=True) as progress:
@@ -79,7 +83,7 @@ def create(
 
         sqlmesh_context = Context(
             paths=[project_dir],
-            config_loader_kwargs=dict(profile=profile, target=target),
+            config_loader_kwargs=dict(profile=profile, target=target, variables=vars),
             load=True,
         )
 
diff --git a/sqlmesh_dbt/options.py b/sqlmesh_dbt/options.py
@@ -0,0 +1,27 @@
+import typing as t
+import click
+
+if t.TYPE_CHECKING:
+    from click.core import Context, Parameter
+
+
+class YamlParamType(click.ParamType):
+    name = "yaml"
+
+    def convert(
+        self, value: t.Any, param: t.Optional[Parameter], ctx: t.Optional[Context]
+    ) -> t.Any:
+        if not isinstance(value, str):
+            self.fail(f"Input value '{value}' should be a string", param, ctx)
+
+        from sqlmesh.utils import yaml
+
+        try:
+            parsed = yaml.load(source=value, render_jinja=False)
+        except:
+            self.fail(f"String '{value}' is not valid YAML", param, ctx)
+
+        if not isinstance(parsed, dict):
+            self.fail(f"String '{value}' did not evaluate to a dict, got: {parsed}", param, ctx)
+
+        return parsed
diff --git a/tests/dbt/cli/test_operations.py b/tests/dbt/cli/test_operations.py
@@ -69,3 +69,17 @@ def test_create_can_specify_profile_and_target(jaffle_shop_duckdb: Path):
 
     assert dbt_project.context.profile_name == "jaffle_shop"
     assert dbt_project.context.target_name == "dev"
+
+
+def test_create_can_set_project_variables(jaffle_shop_duckdb: Path):
+    (jaffle_shop_duckdb / "models" / "test_model.sql").write_text("""
+       select '{{ var('foo') }}' as a
+    """)
+
+    dbt_project = create(vars={"foo": "bar"})
+    assert dbt_project.context.config.variables["foo"] == "bar"
+
+    test_model = dbt_project.context.models['"jaffle_shop"."main"."test_model"']
+    query = test_model.render_query()
+    assert query is not None
+    assert query.sql() == "SELECT 'bar' AS \"a\""

Original file line number	Diff line number	Diff line change
`@@ -175,6 +175,7 @@ def load_config_from_paths(`
`175`	`175`	`project_root=dbt_project_file.parent,`
`176`	`176`	`dbt_profile_name=kwargs.pop("profile", None),`
`177`	`177`	`dbt_target_name=kwargs.pop("target", None),`
	`178`	`+ variables=kwargs.pop("variables", None),`
`178`	`179`	`)`
`179`	`180`	`if type(dbt_python_config) != config_type:`
`180`	`181`	`dbt_python_config = convert_config_type(dbt_python_config, config_type)`