Skip to content

Commit ffe7a04

Browse files
committed
Fix: stop treating dbt schema data types as columns_to_types
1 parent 254e9e1 commit ffe7a04

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

examples/sushi_dbt/models/schema.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ models:
3636
field: waiter_id
3737
- name: revenue
3838
description: Revenue from orders served by this waiter
39+
- name: unused_column
40+
data_type: int
3941
- name: waiters
4042
columns:
4143
- name: waiter_id

examples/sushi_dbt/models/top_waiters.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
SELECT
88
waiter_id::INT AS waiter_id,
9-
revenue::DOUBLE AS revenue
9+
revenue::DOUBLE AS revenue,
10+
1 AS unused_column
1011
FROM {{ ref('waiter_revenue_by_day', version=1) }}
1112
WHERE
1213
ds = (

sqlmesh/dbt/basemodel.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,9 @@ def sqlmesh_model_kwargs(
328328
dependencies.macros, package=self.package_name
329329
)
330330
jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies))
331-
return {
331+
332+
model_kwargs = {
332333
"audits": [(test.name, {}) for test in self.tests],
333-
"columns": column_types_to_sqlmesh(
334-
column_types_override or self.columns, self.dialect(context)
335-
)
336-
or None,
337334
"column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None,
338335
"depends_on": {
339336
model.canonical_name(context) for model in model_context.refs.values()
@@ -349,6 +346,21 @@ def sqlmesh_model_kwargs(
349346
**self.sqlmesh_config_kwargs,
350347
}
351348

349+
# dbt doesn't respect the data_type field for DDL statements– instead, it optionally uses
350+
# it to validate the actual data types at runtime through contracts or external plugins.
351+
# Only the `columns_types` config of seed models is actually respected. We don't set the
352+
# columns attribute to self.columns intentionally in all other cases, as that could result
353+
# in unfaithful types when models are materialized.
354+
#
355+
# See:
356+
# - https://docs.getdbt.com/reference/resource-properties/columns
357+
# - https://docs.getdbt.com/reference/resource-configs/contract
358+
# - https://docs.getdbt.com/reference/resource-configs/column_types
359+
if column_types_override:
360+
model_kwargs["columns"] = column_types_to_sqlmesh(column_types_override)
361+
362+
return model_kwargs
363+
352364
@abstractmethod
353365
def to_sqlmesh(
354366
self,

tests/dbt/test_transformation.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,10 @@ def test_model_columns():
608608
name="target", schema="test", database="test", account="foo", user="bar", password="baz"
609609
)
610610
sqlmesh_model = model.to_sqlmesh(context)
611-
assert sqlmesh_model.columns_to_types == expected_column_types
611+
612+
# Columns being present in a schema.yaml are not respected in DDLs, so SQLMesh doesn't
613+
# set the corresponding columns_to_types_ attribute either to match dbt's behavior
614+
assert sqlmesh_model.columns_to_types == None
612615
assert sqlmesh_model.column_descriptions == expected_column_descriptions
613616

614617

@@ -623,8 +626,11 @@ def test_seed_columns():
623626
},
624627
)
625628

629+
# dbt doesn't respect the data_type field in the DDLs– instead, it optionally uses it to
630+
# validate the actual data types at runtime through contracts or external plugins. Thus,
631+
# the actual data type is int, because that is what is inferred from the seed file.
626632
expected_column_types = {
627-
"id": exp.DataType.build("text"),
633+
"id": exp.DataType.build("int"),
628634
"name": exp.DataType.build("text"),
629635
}
630636
expected_column_descriptions = {

0 commit comments

Comments
 (0)