Skip to content

Commit 5b121e4

Browse files
Merge branch 'main' into themis/dbttrace
2 parents dad35a8 + db58405 commit 5b121e4

File tree

8 files changed

+100
-20
lines changed

8 files changed

+100
-20
lines changed

examples/sushi_dbt/models/schema.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ models:
3636
field: waiter_id
3737
- name: revenue
3838
description: Revenue from orders served by this waiter
39+
- name: unused_column
40+
data_type: int
3941
- name: waiters
4042
columns:
4143
- name: waiter_id

examples/sushi_dbt/models/top_waiters.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
SELECT
88
waiter_id::INT AS waiter_id,
9-
revenue::DOUBLE AS revenue
9+
revenue::DOUBLE AS revenue,
10+
1 AS unused_column
1011
FROM {{ ref('waiter_revenue_by_day', version=1) }}
1112
WHERE
1213
ds = (

sqlmesh/dbt/basemodel.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,9 @@ def sqlmesh_model_kwargs(
328328
dependencies.macros, package=self.package_name
329329
)
330330
jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies))
331-
return {
331+
332+
model_kwargs = {
332333
"audits": [(test.name, {}) for test in self.tests],
333-
"columns": column_types_to_sqlmesh(
334-
column_types_override or self.columns, self.dialect(context)
335-
)
336-
or None,
337334
"column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None,
338335
"depends_on": {
339336
model.canonical_name(context) for model in model_context.refs.values()
@@ -349,6 +346,23 @@ def sqlmesh_model_kwargs(
349346
**self.sqlmesh_config_kwargs,
350347
}
351348

349+
# dbt doesn't respect the data_type field for DDL statements– instead, it optionally uses
350+
# it to validate the actual data types at runtime through contracts or external plugins.
351+
# Only the `columns_types` config of seed models is actually respected. We don't set the
352+
# columns attribute to self.columns intentionally in all other cases, as that could result
353+
# in unfaithful types when models are materialized.
354+
#
355+
# See:
356+
# - https://docs.getdbt.com/reference/resource-properties/columns
357+
# - https://docs.getdbt.com/reference/resource-configs/contract
358+
# - https://docs.getdbt.com/reference/resource-configs/column_types
359+
if column_types_override:
360+
model_kwargs["columns"] = (
361+
column_types_to_sqlmesh(column_types_override, self.dialect(context)) or None
362+
)
363+
364+
return model_kwargs
365+
352366
@abstractmethod
353367
def to_sqlmesh(
354368
self,

sqlmesh/dbt/seed.py

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from __future__ import annotations
22

3-
import copy
43
import typing as t
54

65
import agate
@@ -50,15 +49,11 @@ def to_sqlmesh(
5049
"""Converts the dbt seed into a SQLMesh model."""
5150
seed_path = self.path.absolute().as_posix()
5251

53-
if column_types := self.column_types:
54-
column_types_override = copy.deepcopy(self.columns)
55-
for name, data_type in column_types.items():
56-
column = column_types_override.setdefault(name, ColumnConfig(name=name))
57-
column.data_type = data_type
58-
column.quote = self.quote_columns or column.quote
59-
kwargs = self.sqlmesh_model_kwargs(context, column_types_override)
60-
else:
61-
kwargs = self.sqlmesh_model_kwargs(context)
52+
column_types_override = {
53+
name: ColumnConfig(name=name, data_type=data_type, quote=self.quote_columns)
54+
for name, data_type in (self.column_types or {}).items()
55+
}
56+
kwargs = self.sqlmesh_model_kwargs(context, column_types_override)
6257

6358
columns = kwargs.get("columns") or {}
6459

sqlmesh/utils/jinja.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,14 @@ def _extract(node: nodes.Node, parent: t.Optional[nodes.Node] = None) -> None:
200200
return extracted
201201

202202

203+
def is_variable_node(n: nodes.Node) -> bool:
204+
return (
205+
isinstance(n, nodes.Call)
206+
and isinstance(n.node, nodes.Name)
207+
and n.node.name in (c.VAR, c.BLUEPRINT_VAR)
208+
)
209+
210+
203211
def extract_macro_references_and_variables(
204212
*jinja_strs: str, dbt_target_name: t.Optional[str] = None
205213
) -> t.Tuple[t.Set[MacroReference], t.Set[str]]:
@@ -230,7 +238,15 @@ def extract_macro_references_and_variables(
230238

231239
for call_name, node in extract_call_names(jinja_str):
232240
if call_name[0] in (c.VAR, c.BLUEPRINT_VAR):
233-
assert isinstance(node, nodes.Call)
241+
if not is_variable_node(node):
242+
# Find the variable node which could be nested
243+
for n in node.find_all(nodes.Call):
244+
if is_variable_node(n):
245+
node = n
246+
break
247+
else:
248+
raise ValueError(f"Could not find variable name in {jinja_str}")
249+
node = t.cast(nodes.Call, node)
234250
args = [jinja_call_arg_name(arg) for arg in node.args]
235251
if args and args[0]:
236252
variable_name = args[0].lower()

tests/dbt/converter/test_jinja.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import pytest
2-
from sqlmesh.utils.jinja import JinjaMacroRegistry, MacroExtractor
2+
from sqlmesh.utils.jinja import (
3+
JinjaMacroRegistry,
4+
MacroExtractor,
5+
extract_macro_references_and_variables,
6+
)
37
from sqlmesh.dbt.converter.jinja import JinjaGenerator, convert_jinja_query, convert_jinja_macro
48
import sqlmesh.dbt.converter.jinja_transforms as jt
59
from pathlib import Path
@@ -437,3 +441,10 @@ def test_convert_jinja_macro(input: str, expected: str, sushi_dbt_context: Conte
437441
result = convert_jinja_macro(sushi_dbt_context, input.strip())
438442

439443
assert " ".join(result.split()) == " ".join(expected.strip().split())
444+
445+
446+
def test_extract_macro_references_and_variables() -> None:
447+
input = """JINJA_QUERY('{%- set something = "'"~var("variable").split("|") -%}"""
448+
_, variables = extract_macro_references_and_variables(input)
449+
assert len(variables) == 1
450+
assert variables == {"variable"}

tests/dbt/test_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,13 @@
77
from dbt.adapters.base import BaseRelation, Column
88
from pytest_mock import MockerFixture
99

10+
from sqlglot import exp
1011
from sqlmesh.core.audit import StandaloneAudit
1112
from sqlmesh.core.config import Config, ModelDefaultsConfig
1213
from sqlmesh.core.dialect import jinja_query
1314
from sqlmesh.core.model import SqlModel
1415
from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange
16+
from sqlmesh.dbt.column import ColumnConfig
1517
from sqlmesh.dbt.common import Dependencies
1618
from sqlmesh.dbt.context import DbtContext
1719
from sqlmesh.dbt.loader import sqlmesh_config
@@ -1076,3 +1078,15 @@ def test_on_schema_change_properties(
10761078

10771079
assert model.on_additive_change == expected_additive
10781080
assert model.on_destructive_change == expected_destructive
1081+
1082+
1083+
def test_sqlmesh_model_kwargs_columns_override():
1084+
context = DbtContext()
1085+
context.project_name = "Foo"
1086+
context.target = DuckDbConfig(name="target", schema="foo")
1087+
1088+
kwargs = ModelConfig(dialect="duckdb").sqlmesh_model_kwargs(
1089+
context,
1090+
{"c": ColumnConfig(name="c", data_type="uinteger")},
1091+
)
1092+
assert kwargs.get("columns") == {"c": exp.DataType.build(exp.DataType.Type.UINT)}

tests/dbt/test_transformation.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,10 @@ def test_model_columns():
608608
name="target", schema="test", database="test", account="foo", user="bar", password="baz"
609609
)
610610
sqlmesh_model = model.to_sqlmesh(context)
611-
assert sqlmesh_model.columns_to_types == expected_column_types
611+
612+
# Columns being present in a schema.yaml are not respected in DDLs, so SQLMesh doesn't
613+
# set the corresponding columns_to_types_ attribute either to match dbt's behavior
614+
assert sqlmesh_model.columns_to_types == None
612615
assert sqlmesh_model.column_descriptions == expected_column_descriptions
613616

614617

@@ -623,8 +626,11 @@ def test_seed_columns():
623626
},
624627
)
625628

629+
# dbt doesn't respect the data_type field in the DDLs– instead, it optionally uses it to
630+
# validate the actual data types at runtime through contracts or external plugins. Thus,
631+
# the actual data type is int, because that is what is inferred from the seed file.
626632
expected_column_types = {
627-
"id": exp.DataType.build("text"),
633+
"id": exp.DataType.build("int"),
628634
"name": exp.DataType.build("text"),
629635
}
630636
expected_column_descriptions = {
@@ -671,6 +677,27 @@ def test_seed_column_types():
671677
assert sqlmesh_seed.columns_to_types == expected_column_types
672678
assert sqlmesh_seed.column_descriptions == expected_column_descriptions
673679

680+
seed = SeedConfig(
681+
name="foo",
682+
package="package",
683+
path=Path("examples/sushi_dbt/seeds/waiter_names.csv"),
684+
column_types={
685+
"name": "text",
686+
},
687+
columns={
688+
# The `data_type` field does not affect the materialized seed's column type
689+
"id": ColumnConfig(name="name", data_type="text"),
690+
},
691+
quote_columns=True,
692+
)
693+
694+
expected_column_types = {
695+
"id": exp.DataType.build("int"),
696+
"name": exp.DataType.build("text"),
697+
}
698+
sqlmesh_seed = seed.to_sqlmesh(context)
699+
assert sqlmesh_seed.columns_to_types == expected_column_types
700+
674701

675702
def test_seed_column_inference(tmp_path):
676703
seed_csv = tmp_path / "seed.csv"

0 commit comments

Comments
 (0)