Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion sqlmesh/dbt/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ def to_sqlmesh(
kwargs["columns"] = new_columns

# dbt treats single whitespace as a null value
csv_settings = CsvSettings(na_values=[" "], keep_default_na=True)
csv_settings = CsvSettings(
delimiter=self.delimiter,
na_values=[" "],
keep_default_na=True,
)

return create_seed_model(
self.canonical_name(context),
Expand Down
21 changes: 20 additions & 1 deletion tests/dbt/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def test_source_config(sushi_test_project: Project):
@pytest.mark.slow
def test_seed_config(sushi_test_project: Project, mocker: MockerFixture):
seed_configs = sushi_test_project.packages["sushi"].seeds
assert set(seed_configs) == {"waiter_names"}
assert set(seed_configs) == {"waiter_names", "waiter_revenue_semicolon"}
raw_items_seed = seed_configs["waiter_names"]

expected_config = {
Expand All @@ -465,6 +465,25 @@ def test_seed_config(sushi_test_project: Project, mocker: MockerFixture):
== '"MEMORY"."SUSHI"."WAITER_NAMES"'
)

waiter_revenue_semicolon_seed = seed_configs["waiter_revenue_semicolon"]

expected_config_semicolon = {
"path": Path(sushi_test_project.context.project_root, "seeds/waiter_revenue_semicolon.csv"),
"schema_": "sushi",
"delimiter": ";",
}
actual_config_semicolon = {
k: getattr(waiter_revenue_semicolon_seed, k) for k, v in expected_config_semicolon.items()
}
assert actual_config_semicolon == expected_config_semicolon

assert waiter_revenue_semicolon_seed.canonical_name(context) == "sushi.waiter_revenue_semicolon"
assert (
waiter_revenue_semicolon_seed.to_sqlmesh(context).name == "sushi.waiter_revenue_semicolon"
)
assert waiter_revenue_semicolon_seed.delimiter == ";"
assert set(waiter_revenue_semicolon_seed.columns.keys()) == {"waiter_id", "revenue", "quarter"}


def test_quoting():
model = ModelConfig(alias="bar", schema="foo")
Expand Down
91 changes: 91 additions & 0 deletions tests/dbt/test_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,58 @@ def test_seed_partial_column_inference(tmp_path):
assert list(seed_df.columns) == list(sqlmesh_seed.columns_to_types.keys())


def test_seed_delimiter(tmp_path):
seed_csv = tmp_path / "seed_with_delimiter.csv"

with open(seed_csv, "w", encoding="utf-8") as fd:
fd.writelines("\n".join(["id|name|city", "0|Ayrton|SP", "1|Max|MC", "2|Niki|VIE"]))

seed = SeedConfig(
name="test_model_pipe",
package="package",
path=Path(seed_csv),
delimiter="|",
)

context = DbtContext()
context.project_name = "TestProject"
context.target = DuckDbConfig(name="target", schema="test")
sqlmesh_seed = seed.to_sqlmesh(context)

# Verify columns are correct with the custom pipe (|) delimiter
expected_columns = {"id", "name", "city"}
assert set(sqlmesh_seed.columns_to_types.keys()) == expected_columns

seed_df = next(sqlmesh_seed.render_seed())
assert list(seed_df.columns) == list(sqlmesh_seed.columns_to_types.keys())
assert len(seed_df) == 3

assert seed_df.iloc[0]["name"] == "Ayrton"
assert seed_df.iloc[0]["city"] == "SP"
assert seed_df.iloc[1]["name"] == "Max"
assert seed_df.iloc[1]["city"] == "MC"

# test with semicolon delimiter
seed_csv_semicolon = tmp_path / "seed_with_semicolon.csv"
with open(seed_csv_semicolon, "w", encoding="utf-8") as fd:
fd.writelines("\n".join(["id;value;status", "1;100;active", "2;200;inactive"]))

seed_semicolon = SeedConfig(
name="test_model_semicolon",
package="package",
path=Path(seed_csv_semicolon),
delimiter=";",
)

sqlmesh_seed_semicolon = seed_semicolon.to_sqlmesh(context)
expected_columns_semicolon = {"id", "value", "status"}
assert set(sqlmesh_seed_semicolon.columns_to_types.keys()) == expected_columns_semicolon

seed_df_semicolon = next(sqlmesh_seed_semicolon.render_seed())
assert seed_df_semicolon.iloc[0]["value"] == 100
assert seed_df_semicolon.iloc[0]["status"] == "active"


def test_seed_column_order(tmp_path):
seed_csv = tmp_path / "seed.csv"

Expand Down Expand Up @@ -910,6 +962,45 @@ def test_hooks(sushi_test_dbt_context: Context, model_fqn: str):
assert "post-hook" in mock_logger.call_args[0][0]


@pytest.mark.xdist_group("dbt_manifest")
def test_seed_delimiter_integration(sushi_test_dbt_context: Context):
seed_fqn = '"memory"."sushi"."waiter_revenue_semicolon"'
assert seed_fqn in sushi_test_dbt_context.models

seed_model = sushi_test_dbt_context.models[seed_fqn]
assert seed_model.columns_to_types is not None

# this should be loaded with semicolon delimiter otherwise it'd resylt in an one column table
assert set(seed_model.columns_to_types.keys()) == {"waiter_id", "revenue", "quarter"}

# columns_to_types values are correct types as well
assert seed_model.columns_to_types == {
"waiter_id": exp.DataType.build("int"),
"revenue": exp.DataType.build("double"),
"quarter": exp.DataType.build("text"),
}

df = sushi_test_dbt_context.fetchdf(f"SELECT * FROM {seed_fqn}")

assert len(df) == 6
waiter_ids = set(df["waiter_id"].tolist())
quarters = set(df["quarter"].tolist())
assert waiter_ids == {1, 2, 3}
assert quarters == {"Q1", "Q2"}

q1_w1_rows = df[(df["waiter_id"] == 1) & (df["quarter"] == "Q1")]
assert len(q1_w1_rows) == 1
assert float(q1_w1_rows.iloc[0]["revenue"]) == 100.50

q2_w2_rows = df[(df["waiter_id"] == 2) & (df["quarter"] == "Q2")]
assert len(q2_w2_rows) == 1
assert float(q2_w2_rows.iloc[0]["revenue"]) == 225.50

q2_w3_rows = df[(df["waiter_id"] == 3) & (df["quarter"] == "Q2")]
assert len(q2_w3_rows) == 1
assert float(q2_w3_rows.iloc[0]["revenue"]) == 175.75


@pytest.mark.xdist_group("dbt_manifest")
def test_target_jinja(sushi_test_project: Project):
context = sushi_test_project.context
Expand Down
10 changes: 10 additions & 0 deletions tests/fixtures/dbt/sushi_test/seeds/properties.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,13 @@ version: 2

seeds:
- name: waiter_names
- name: waiter_revenue_semicolon
config:
delimiter: ";"
columns:
- name: waiter_id
data_type: int
- name: revenue
data_type: decimal
- name: quarter
data_type: text
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
waiter_id;revenue;quarter
1;100.50;Q1
2;200.75;Q1
3;150.25;Q1
1;125.00;Q2
2;225.50;Q2
3;175.75;Q2