diff --git a/examples/sushi_dbt/models/schema.yml b/examples/sushi_dbt/models/schema.yml index 6dd6484b08..d42d64bcce 100644 --- a/examples/sushi_dbt/models/schema.yml +++ b/examples/sushi_dbt/models/schema.yml @@ -36,6 +36,8 @@ models: field: waiter_id - name: revenue description: Revenue from orders served by this waiter + - name: unused_column + data_type: int - name: waiters columns: - name: waiter_id diff --git a/examples/sushi_dbt/models/top_waiters.sql b/examples/sushi_dbt/models/top_waiters.sql index f839b31dc2..e4a74fd8b3 100644 --- a/examples/sushi_dbt/models/top_waiters.sql +++ b/examples/sushi_dbt/models/top_waiters.sql @@ -6,7 +6,8 @@ SELECT waiter_id::INT AS waiter_id, - revenue::DOUBLE AS revenue + revenue::DOUBLE AS revenue, + 1 AS unused_column FROM {{ ref('waiter_revenue_by_day', version=1) }} WHERE ds = ( diff --git a/sqlmesh/dbt/basemodel.py b/sqlmesh/dbt/basemodel.py index f1e1dbed03..fad86f618e 100644 --- a/sqlmesh/dbt/basemodel.py +++ b/sqlmesh/dbt/basemodel.py @@ -328,12 +328,9 @@ def sqlmesh_model_kwargs( dependencies.macros, package=self.package_name ) jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies)) - return { + + model_kwargs = { "audits": [(test.name, {}) for test in self.tests], - "columns": column_types_to_sqlmesh( - column_types_override or self.columns, self.dialect(context) - ) - or None, "column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None, "depends_on": { model.canonical_name(context) for model in model_context.refs.values() @@ -349,6 +346,23 @@ def sqlmesh_model_kwargs( **self.sqlmesh_config_kwargs, } + # dbt doesn't respect the data_type field for DDL statements– instead, it optionally uses + # it to validate the actual data types at runtime through contracts or external plugins. + # Only the `columns_types` config of seed models is actually respected. We don't set the + # columns attribute to self.columns intentionally in all other cases, as that could result + # in unfaithful types when models are materialized. + # + # See: + # - https://docs.getdbt.com/reference/resource-properties/columns + # - https://docs.getdbt.com/reference/resource-configs/contract + # - https://docs.getdbt.com/reference/resource-configs/column_types + if column_types_override: + model_kwargs["columns"] = ( + column_types_to_sqlmesh(column_types_override, self.dialect(context)) or None + ) + + return model_kwargs + @abstractmethod def to_sqlmesh( self, diff --git a/sqlmesh/dbt/seed.py b/sqlmesh/dbt/seed.py index cf22d961cf..08e89ee584 100644 --- a/sqlmesh/dbt/seed.py +++ b/sqlmesh/dbt/seed.py @@ -1,6 +1,5 @@ from __future__ import annotations -import copy import typing as t import agate @@ -50,15 +49,11 @@ def to_sqlmesh( """Converts the dbt seed into a SQLMesh model.""" seed_path = self.path.absolute().as_posix() - if column_types := self.column_types: - column_types_override = copy.deepcopy(self.columns) - for name, data_type in column_types.items(): - column = column_types_override.setdefault(name, ColumnConfig(name=name)) - column.data_type = data_type - column.quote = self.quote_columns or column.quote - kwargs = self.sqlmesh_model_kwargs(context, column_types_override) - else: - kwargs = self.sqlmesh_model_kwargs(context) + column_types_override = { + name: ColumnConfig(name=name, data_type=data_type, quote=self.quote_columns) + for name, data_type in (self.column_types or {}).items() + } + kwargs = self.sqlmesh_model_kwargs(context, column_types_override) columns = kwargs.get("columns") or {} diff --git a/tests/dbt/test_config.py b/tests/dbt/test_config.py index 1483225987..852ad02d5e 100644 --- a/tests/dbt/test_config.py +++ b/tests/dbt/test_config.py @@ -7,11 +7,13 @@ from dbt.adapters.base import BaseRelation, Column from pytest_mock import MockerFixture +from sqlglot import exp from sqlmesh.core.audit import StandaloneAudit from sqlmesh.core.config import Config, ModelDefaultsConfig from sqlmesh.core.dialect import jinja_query from sqlmesh.core.model import SqlModel from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange +from sqlmesh.dbt.column import ColumnConfig from sqlmesh.dbt.common import Dependencies from sqlmesh.dbt.context import DbtContext from sqlmesh.dbt.loader import sqlmesh_config @@ -1076,3 +1078,15 @@ def test_on_schema_change_properties( assert model.on_additive_change == expected_additive assert model.on_destructive_change == expected_destructive + + +def test_sqlmesh_model_kwargs_columns_override(): + context = DbtContext() + context.project_name = "Foo" + context.target = DuckDbConfig(name="target", schema="foo") + + kwargs = ModelConfig(dialect="duckdb").sqlmesh_model_kwargs( + context, + {"c": ColumnConfig(name="c", data_type="uinteger")}, + ) + assert kwargs.get("columns") == {"c": exp.DataType.build(exp.DataType.Type.UINT)} diff --git a/tests/dbt/test_transformation.py b/tests/dbt/test_transformation.py index c976e56744..779160c27d 100644 --- a/tests/dbt/test_transformation.py +++ b/tests/dbt/test_transformation.py @@ -608,7 +608,10 @@ def test_model_columns(): name="target", schema="test", database="test", account="foo", user="bar", password="baz" ) sqlmesh_model = model.to_sqlmesh(context) - assert sqlmesh_model.columns_to_types == expected_column_types + + # Columns being present in a schema.yaml are not respected in DDLs, so SQLMesh doesn't + # set the corresponding columns_to_types_ attribute either to match dbt's behavior + assert sqlmesh_model.columns_to_types == None assert sqlmesh_model.column_descriptions == expected_column_descriptions @@ -623,8 +626,11 @@ def test_seed_columns(): }, ) + # dbt doesn't respect the data_type field in the DDLs– instead, it optionally uses it to + # validate the actual data types at runtime through contracts or external plugins. Thus, + # the actual data type is int, because that is what is inferred from the seed file. expected_column_types = { - "id": exp.DataType.build("text"), + "id": exp.DataType.build("int"), "name": exp.DataType.build("text"), } expected_column_descriptions = { @@ -671,6 +677,27 @@ def test_seed_column_types(): assert sqlmesh_seed.columns_to_types == expected_column_types assert sqlmesh_seed.column_descriptions == expected_column_descriptions + seed = SeedConfig( + name="foo", + package="package", + path=Path("examples/sushi_dbt/seeds/waiter_names.csv"), + column_types={ + "name": "text", + }, + columns={ + # The `data_type` field does not affect the materialized seed's column type + "id": ColumnConfig(name="name", data_type="text"), + }, + quote_columns=True, + ) + + expected_column_types = { + "id": exp.DataType.build("int"), + "name": exp.DataType.build("text"), + } + sqlmesh_seed = seed.to_sqlmesh(context) + assert sqlmesh_seed.columns_to_types == expected_column_types + def test_seed_column_inference(tmp_path): seed_csv = tmp_path / "seed.csv"