Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/sushi_dbt/models/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ models:
field: waiter_id
- name: revenue
description: Revenue from orders served by this waiter
- name: unused_column
data_type: int
- name: waiters
columns:
- name: waiter_id
Expand Down
3 changes: 2 additions & 1 deletion examples/sushi_dbt/models/top_waiters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

SELECT
waiter_id::INT AS waiter_id,
revenue::DOUBLE AS revenue
revenue::DOUBLE AS revenue,
1 AS unused_column
FROM {{ ref('waiter_revenue_by_day', version=1) }}
WHERE
ds = (
Expand Down
24 changes: 19 additions & 5 deletions sqlmesh/dbt/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,9 @@ def sqlmesh_model_kwargs(
dependencies.macros, package=self.package_name
)
jinja_macros.add_globals(self._model_jinja_context(model_context, dependencies))
return {

model_kwargs = {
"audits": [(test.name, {}) for test in self.tests],
"columns": column_types_to_sqlmesh(
column_types_override or self.columns, self.dialect(context)
)
or None,
"column_descriptions": column_descriptions_to_sqlmesh(self.columns) or None,
"depends_on": {
model.canonical_name(context) for model in model_context.refs.values()
Expand All @@ -349,6 +346,23 @@ def sqlmesh_model_kwargs(
**self.sqlmesh_config_kwargs,
}

# dbt doesn't respect the data_type field for DDL statements– instead, it optionally uses
# it to validate the actual data types at runtime through contracts or external plugins.
# Only the `columns_types` config of seed models is actually respected. We don't set the
# columns attribute to self.columns intentionally in all other cases, as that could result
# in unfaithful types when models are materialized.
#
# See:
# - https://docs.getdbt.com/reference/resource-properties/columns
# - https://docs.getdbt.com/reference/resource-configs/contract
# - https://docs.getdbt.com/reference/resource-configs/column_types
if column_types_override:
model_kwargs["columns"] = (
column_types_to_sqlmesh(column_types_override, self.dialect(context)) or None
)

return model_kwargs

@abstractmethod
def to_sqlmesh(
self,
Expand Down
15 changes: 5 additions & 10 deletions sqlmesh/dbt/seed.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import copy
import typing as t

import agate
Expand Down Expand Up @@ -50,15 +49,11 @@ def to_sqlmesh(
"""Converts the dbt seed into a SQLMesh model."""
seed_path = self.path.absolute().as_posix()

if column_types := self.column_types:
column_types_override = copy.deepcopy(self.columns)
for name, data_type in column_types.items():
column = column_types_override.setdefault(name, ColumnConfig(name=name))
column.data_type = data_type
column.quote = self.quote_columns or column.quote
kwargs = self.sqlmesh_model_kwargs(context, column_types_override)
else:
kwargs = self.sqlmesh_model_kwargs(context)
column_types_override = {
name: ColumnConfig(name=name, data_type=data_type, quote=self.quote_columns)
for name, data_type in (self.column_types or {}).items()
}
kwargs = self.sqlmesh_model_kwargs(context, column_types_override)

columns = kwargs.get("columns") or {}

Expand Down
14 changes: 14 additions & 0 deletions tests/dbt/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
from dbt.adapters.base import BaseRelation, Column
from pytest_mock import MockerFixture

from sqlglot import exp
from sqlmesh.core.audit import StandaloneAudit
from sqlmesh.core.config import Config, ModelDefaultsConfig
from sqlmesh.core.dialect import jinja_query
from sqlmesh.core.model import SqlModel
from sqlmesh.core.model.kind import OnDestructiveChange, OnAdditiveChange
from sqlmesh.dbt.column import ColumnConfig
from sqlmesh.dbt.common import Dependencies
from sqlmesh.dbt.context import DbtContext
from sqlmesh.dbt.loader import sqlmesh_config
Expand Down Expand Up @@ -1076,3 +1078,15 @@ def test_on_schema_change_properties(

assert model.on_additive_change == expected_additive
assert model.on_destructive_change == expected_destructive


def test_sqlmesh_model_kwargs_columns_override():
context = DbtContext()
context.project_name = "Foo"
context.target = DuckDbConfig(name="target", schema="foo")

kwargs = ModelConfig(dialect="duckdb").sqlmesh_model_kwargs(
context,
{"c": ColumnConfig(name="c", data_type="uinteger")},
)
assert kwargs.get("columns") == {"c": exp.DataType.build(exp.DataType.Type.UINT)}
31 changes: 29 additions & 2 deletions tests/dbt/test_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,10 @@ def test_model_columns():
name="target", schema="test", database="test", account="foo", user="bar", password="baz"
)
sqlmesh_model = model.to_sqlmesh(context)
assert sqlmesh_model.columns_to_types == expected_column_types

# Columns being present in a schema.yaml are not respected in DDLs, so SQLMesh doesn't
# set the corresponding columns_to_types_ attribute either to match dbt's behavior
assert sqlmesh_model.columns_to_types == None
assert sqlmesh_model.column_descriptions == expected_column_descriptions


Expand All @@ -623,8 +626,11 @@ def test_seed_columns():
},
)

# dbt doesn't respect the data_type field in the DDLs– instead, it optionally uses it to
# validate the actual data types at runtime through contracts or external plugins. Thus,
# the actual data type is int, because that is what is inferred from the seed file.
expected_column_types = {
"id": exp.DataType.build("text"),
"id": exp.DataType.build("int"),
"name": exp.DataType.build("text"),
}
expected_column_descriptions = {
Expand Down Expand Up @@ -671,6 +677,27 @@ def test_seed_column_types():
assert sqlmesh_seed.columns_to_types == expected_column_types
assert sqlmesh_seed.column_descriptions == expected_column_descriptions

seed = SeedConfig(
name="foo",
package="package",
path=Path("examples/sushi_dbt/seeds/waiter_names.csv"),
column_types={
"name": "text",
},
columns={
# The `data_type` field does not affect the materialized seed's column type
"id": ColumnConfig(name="name", data_type="text"),
},
quote_columns=True,
)

expected_column_types = {
"id": exp.DataType.build("int"),
"name": exp.DataType.build("text"),
}
sqlmesh_seed = seed.to_sqlmesh(context)
assert sqlmesh_seed.columns_to_types == expected_column_types


def test_seed_column_inference(tmp_path):
seed_csv = tmp_path / "seed.csv"
Expand Down