Fix!: depend on all attributes of dbt model when passed to a macro

georgesittas · georgesittas · commit fd4ec214dc30 · 2025-09-01T15:48:58.000+03:00
diff --git a/examples/sushi_dbt/macros/check_model_is_table.sql b/examples/sushi_dbt/macros/check_model_is_table.sql
@@ -0,0 +1,15 @@
+{%- macro check_model_is_table(model) -%}
+    {%- if model.config.materialized != 'table' -%}
+        {%- do exceptions.raise_compiler_error(
+            "Model must use the table materialization. Please check any model overrides."
+        ) -%}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro check_model_is_table_alt(foo) -%}
+    {%- if foo.config.materialized != 'table' -%}
+        {%- do exceptions.raise_compiler_error(
+            "Model must use the table materialization. Please check any model overrides."
+        ) -%}
+    {%- endif -%}
+{%- endmacro -%}
diff --git a/examples/sushi_dbt/models/customers.sql b/examples/sushi_dbt/models/customers.sql
@@ -1,3 +1,9 @@
+{{ check_model_is_table(model) }}
+
+{% if 'DISTINCT' in model.raw_code %}
+  {{ check_model_is_table_alt(model) }}
+{% endif %}
+
 SELECT DISTINCT
   customer_id::INT AS customer_id
 FROM {{ ref('orders') }} as o
diff --git a/sqlmesh/core/renderer.py b/sqlmesh/core/renderer.py
@@ -188,30 +188,32 @@ def _resolve_table(table: str | exp.Table) -> str:
         }
 
         variables = kwargs.pop("variables", {})
-        jinja_env_kwargs = {
-            **{
-                **render_kwargs,
-                **_prepare_python_env_for_jinja(macro_evaluator, self._python_env),
-                **variables,
-            },
-            "snapshots": snapshots or {},
-            "table_mapping": table_mapping,
-            "deployability_index": deployability_index,
-            "default_catalog": self._default_catalog,
-            "runtime_stage": runtime_stage.value,
-            "resolve_table": _resolve_table,
-        }
-        if this_model:
-            render_kwargs["this_model"] = this_model
-            jinja_env_kwargs["this_model"] = this_model.sql(
-                dialect=self._dialect, identify=True, comments=False
-            )
-
-        jinja_env = self._jinja_macro_registry.build_environment(**jinja_env_kwargs)
 
         expressions = [self._expression]
         if isinstance(self._expression, d.Jinja):
             try:
+                jinja_env_kwargs = {
+                    **{
+                        **render_kwargs,
+                        **_prepare_python_env_for_jinja(macro_evaluator, self._python_env),
+                        **variables,
+                    },
+                    "snapshots": snapshots or {},
+                    "table_mapping": table_mapping,
+                    "deployability_index": deployability_index,
+                    "default_catalog": self._default_catalog,
+                    "runtime_stage": runtime_stage.value,
+                    "resolve_table": _resolve_table,
+                    "raw_code": self._expression.name,
+                }
+
+                if this_model:
+                    jinja_env_kwargs["this_model"] = this_model.sql(
+                        dialect=self._dialect, identify=True, comments=False
+                    )
+
+                jinja_env = self._jinja_macro_registry.build_environment(**jinja_env_kwargs)
+
                 expressions = []
                 rendered_expression = jinja_env.from_string(self._expression.name).render()
                 logger.debug(
@@ -229,6 +231,9 @@ def _resolve_table(table: str | exp.Table) -> str:
                     f"Could not render or parse jinja at '{self._path}'.\n{ex}"
                 ) from ex
 
+        if this_model:
+            render_kwargs["this_model"] = this_model
+
         macro_evaluator.locals.update(render_kwargs)
 
         if variables:
diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py
@@ -19,6 +19,7 @@
     column_types_to_sqlmesh,
 )
 from sqlmesh.dbt.common import (
+    DBT_ALL_MODEL_ATTRS,
     DbtConfig,
     Dependencies,
     GeneralConfig,
@@ -27,6 +28,7 @@
 )
 from sqlmesh.dbt.relation import Policy, RelationType
 from sqlmesh.dbt.test import TestConfig
+from sqlmesh.dbt.util import DBT_VERSION
 from sqlmesh.utils import AttributeDict
 from sqlmesh.utils.errors import ConfigError
 from sqlmesh.utils.pydantic import field_validator
@@ -375,15 +377,23 @@ def to_sqlmesh(
     def _model_jinja_context(
         self, context: DbtContext, dependencies: Dependencies
     ) -> t.Dict[str, t.Any]:
-        model_node: AttributeDict[str, t.Any] = AttributeDict(
-            {
-                k: v
-                for k, v in context._manifest._manifest.nodes[self.node_name].to_dict().items()
-                if k in dependencies.model_attrs
-            }
-            if context._manifest and self.node_name in context._manifest._manifest.nodes
-            else {}
-        )
+        if context._manifest and self.node_name in context._manifest._manifest.nodes:
+            attributes = context._manifest._manifest.nodes[self.node_name].to_dict()
+            if DBT_ALL_MODEL_ATTRS in dependencies.model_attrs:
+                model_node: AttributeDict[str, t.Any] = AttributeDict(attributes)
+            else:
+                model_node = AttributeDict(
+                    filter(lambda kv: kv[0] in dependencies.model_attrs, attributes.items())
+                )
+
+            raw_code_key = "raw_code" if DBT_VERSION >= (1, 3, 0) else "raw_sql"  # type: ignore
+
+            # We exclude the raw SQL code to reduce the payload size. It's still accessible through
+            # the JinjaQuery instance stored in the resulting SQLMesh model's `query` field.
+            model_node.pop(raw_code_key, None)
+        else:
+            model_node = AttributeDict({})
+
         return {
             "this": self.relation_info,
             "model": model_node,
diff --git a/sqlmesh/dbt/builtin.py b/sqlmesh/dbt/builtin.py
@@ -26,6 +26,13 @@
 from sqlmesh.utils.errors import ConfigError, MacroEvalError
 from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroReference, MacroReturnVal
 
+if t.TYPE_CHECKING:
+    from typing import Protocol
+
+    class Model(Protocol):
+        def __getattr__(self, key: str) -> t.Any: ...
+
+
 logger = logging.getLogger(__name__)
 
 
@@ -249,6 +256,21 @@ def source(package: str, name: str) -> t.Optional[BaseRelation]:
     return source
 
 
+def generate_model(model: AttributeDict, raw_code: str) -> Model:
+    class Model:
+        def __init__(self, model: AttributeDict) -> None:
+            self._model = model
+            self._raw_code_key = "raw_code" if DBT_VERSION >= (1, 3, 0) else "raw_sql"  # type: ignore
+
+        def __getattr__(self, key: str) -> t.Any:
+            if key == self._raw_code_key:
+                return raw_code
+
+            return getattr(self._model, key)
+
+    return Model(model)
+
+
 def return_val(val: t.Any) -> None:
     raise MacroReturnVal(val)
 
@@ -415,12 +437,16 @@ def create_builtin_globals(
             is_incremental &= snapshot_table_exists
     else:
         is_incremental = False
+
     builtin_globals["is_incremental"] = lambda: is_incremental
 
     builtin_globals["builtins"] = AttributeDict(
         {k: builtin_globals.get(k) for k in ("ref", "source", "config", "var")}
     )
 
+    if (model := jinja_globals.pop("model", None)) is not None:
+        builtin_globals["model"] = generate_model(model, jinja_globals.pop("model", ""))
+
     if engine_adapter is not None:
         builtin_globals["flags"] = Flags(which="run")
         adapter: BaseAdapter = RuntimeAdapter(
diff --git a/sqlmesh/dbt/common.py b/sqlmesh/dbt/common.py
@@ -19,6 +19,7 @@
 T = t.TypeVar("T", bound="GeneralConfig")
 
 PROJECT_FILENAME = DBT_PROJECT_FILENAME
+DBT_ALL_MODEL_ATTRS = "__DBT_ALL_MODEL_ATTRS__"
 
 JINJA_ONLY = {
     "adapter",
diff --git a/sqlmesh/dbt/manifest.py b/sqlmesh/dbt/manifest.py
@@ -44,8 +44,8 @@
 from sqlmesh.core import constants as c
 from sqlmesh.utils.errors import SQLMeshError
 from sqlmesh.core.config import ModelDefaultsConfig
-from sqlmesh.dbt.basemodel import Dependencies
 from sqlmesh.dbt.builtin import BUILTIN_FILTERS, BUILTIN_GLOBALS, OVERRIDDEN_MACROS
+from sqlmesh.dbt.common import DBT_ALL_MODEL_ATTRS, Dependencies
 from sqlmesh.dbt.model import ModelConfig
 from sqlmesh.dbt.package import HookConfig, MacroConfig
 from sqlmesh.dbt.seed import SeedConfig
@@ -354,7 +354,9 @@ def _load_models_and_seeds(self) -> None:
                 dependencies = Dependencies(
                     macros=macro_references, refs=_refs(node), sources=_sources(node)
                 )
-                dependencies = dependencies.union(self._extra_dependencies(sql, node.package_name))
+                dependencies = dependencies.union(
+                    self._extra_dependencies(sql, node.package_name, track_all_model_attrs=True)
+                )
                 dependencies = dependencies.union(
                     self._flatten_dependencies_from_macros(dependencies.macros, node.package_name)
                 )
@@ -548,15 +550,35 @@ def _flatten_dependencies_from_macros(
             dependencies = dependencies.union(macro_dependencies)
         return dependencies
 
-    def _extra_dependencies(self, target: str, package: str) -> Dependencies:
-        # We sometimes observe that the manifest doesn't capture all macros, refs, and sources within a macro.
-        # This behavior has been observed with macros like dbt.current_timestamp(), dbt_utils.slugify(), and source().
-        # Here we apply our custom extractor to make a best effort to supplement references captured in the manifest.
+    def _extra_dependencies(
+        self,
+        target: str,
+        package: str,
+        track_all_model_attrs: bool = False,
+    ) -> Dependencies:
+        """
+        We sometimes observe that the manifest doesn't capture all macros, refs, and sources within a macro.
+        This behavior has been observed with macros like dbt.current_timestamp(), dbt_utils.slugify(), and source().
+        Here we apply our custom extractor to make a best effort to supplement references captured in the manifest.
+        """
         dependencies = Dependencies()
+
+        # Whether all `model` attributes (e.g., `model.config`) should be included in the dependencies
+        all_model_attrs = False
+
         for call_name, node in extract_call_names(target, cache=self._calls):
             if call_name[0] == "config":
                 continue
-            elif isinstance(node, jinja2.nodes.Getattr):
+
+            if (
+                track_all_model_attrs
+                and not all_model_attrs
+                and isinstance(node, jinja2.nodes.Call)
+                and any(isinstance(a, jinja2.nodes.Name) and a.name == "model" for a in node.args)
+            ):
+                all_model_attrs = True
+
+            if isinstance(node, jinja2.nodes.Getattr):
                 if call_name[0] == "model":
                     dependencies.model_attrs.add(call_name[1])
             elif call_name[0] == "source":
@@ -602,6 +624,14 @@ def _extra_dependencies(self, target: str, package: str) -> Dependencies:
                         call_name[0], call_name[1], dependencies.macros.append
                     )
 
+        # When `model` is referenced as-is, e.g. it's passed as an argument to a macro call like
+        # `{{ foo(model) }}`, we can't easily track the attributes that are actually used, because
+        # it may be aliased and hence tracking actual uses of `model` requires a proper data flow
+        # analysis. We conservatively deal with this by including all of its supported attributes
+        # if a standalone reference is found.
+        if all_model_attrs:
+            dependencies.model_attrs = {DBT_ALL_MODEL_ATTRS}
+
         return dependencies
 
 
diff --git a/tests/core/test_context.py b/tests/core/test_context.py
@@ -1533,6 +1533,11 @@ def test_plan_enable_preview_default(sushi_context: Context, sushi_dbt_context:
     assert sushi_dbt_context._plan_preview_enabled
 
 
+def test_raw_code_missing_from_model_attributes(sushi_dbt_context: Context):
+    customers_model = sushi_dbt_context.models['"memory"."sushi"."customers"']
+    assert "raw_code" not in customers_model.jinja_macros.global_objs["model"]
+
+
 def test_catalog_name_needs_to_be_quoted():
     config = Config(
         model_defaults=ModelDefaultsConfig(dialect="duckdb"),