From b08a8ce89237d4f88cba6d6992b94edb06017259 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 28 Aug 2025 00:56:54 +0300 Subject: [PATCH 1/2] Chore!: add migration script to warn about dbt `data_type`-related diffs --- .../v0092_warn_about_dbt_data_type_diff.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py diff --git a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py new file mode 100644 index 0000000000..48b6d0b9c2 --- /dev/null +++ b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py @@ -0,0 +1,43 @@ +""" +Warns dbt users about potential diffs due to corrected data_type handling. + +SQLMesh previously treated dbt's schema.yml data_type field as columns_to_types, which +doesn't match dbt's behavior. dbt only uses data_type for contracts/validation, not DDL. +This fix may cause diffs if tables were created with incorrect types. + +More context: https://github.com/TobikoData/sqlmesh/pull/5231 +""" + +import json + +from sqlglot import exp + +from sqlmesh.core.console import get_console + + +def migrate(state_sync, **kwargs): # type: ignore + engine_adapter = state_sync.engine_adapter + schema = state_sync.schema + snapshots_table = "_snapshots" + if schema: + snapshots_table = f"{schema}.{snapshots_table}" + + warning = ( + "IMPORTANT: this migration may have unexpected side-effects for dbt projects.\n\n" + "SQLMesh previously misinterpreted dbt's schema.yml 'data_type' field as actual " + "column types, but dbt only uses these for contracts/validation, not in actual DDL statements. This " + "has been fixed to match dbt's actual behavior. Your existing tables may have been created with " + "incorrect column types. After this migration, run 'sqlmesh diff prod' to check for column " + "type differences, and if any are found, apply a plan to correct the table schemas. " + "For more details, see: https://github.com/TobikoData/sqlmesh/pull/5231." + ) + + for (snapshot,) in engine_adapter.fetchall( + exp.select("snapshot").from_(snapshots_table), quote_identifiers=True + ): + parsed_snapshot = json.loads(snapshot) + node = parsed_snapshot["node"] + + if node.get("columns"): + get_console().log_warning(warning) + return From 6583537dd7070a0ed34dd9468624f97e6a575fb7 Mon Sep 17 00:00:00 2001 From: George Sittas Date: Thu, 28 Aug 2025 01:11:09 +0300 Subject: [PATCH 2/2] Improve warn heuristic --- .../v0092_warn_about_dbt_data_type_diff.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py index 48b6d0b9c2..08ff1b1de2 100644 --- a/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py +++ b/sqlmesh/migrations/v0092_warn_about_dbt_data_type_diff.py @@ -14,6 +14,8 @@ from sqlmesh.core.console import get_console +SQLMESH_DBT_PACKAGE = "sqlmesh.dbt" + def migrate(state_sync, **kwargs): # type: ignore engine_adapter = state_sync.engine_adapter @@ -23,13 +25,13 @@ def migrate(state_sync, **kwargs): # type: ignore snapshots_table = f"{schema}.{snapshots_table}" warning = ( - "IMPORTANT: this migration may have unexpected side-effects for dbt projects.\n\n" "SQLMesh previously misinterpreted dbt's schema.yml 'data_type' field as actual " - "column types, but dbt only uses these for contracts/validation, not in actual DDL statements. This " - "has been fixed to match dbt's actual behavior. Your existing tables may have been created with " - "incorrect column types. After this migration, run 'sqlmesh diff prod' to check for column " - "type differences, and if any are found, apply a plan to correct the table schemas. " - "For more details, see: https://github.com/TobikoData/sqlmesh/pull/5231." + "column types, but dbt only uses these for contracts/validation, not in actual " + "DDL statements. This has been fixed to match dbt's actual behavior. Your existing " + "tables may have been created with incorrect column types. After this migration, run " + "'sqlmesh diff prod' to check for column type differences, and if any are found, " + "apply a plan to correct the table schemas. For more details, see: " + "https://github.com/TobikoData/sqlmesh/pull/5231." ) for (snapshot,) in engine_adapter.fetchall( @@ -38,6 +40,9 @@ def migrate(state_sync, **kwargs): # type: ignore parsed_snapshot = json.loads(snapshot) node = parsed_snapshot["node"] - if node.get("columns"): + jinja_macros = node.get("jinja_macros") or {} + create_builtins_module = jinja_macros.get("create_builtins_module") or "" + + if create_builtins_module == SQLMESH_DBT_PACKAGE and node.get("columns"): get_console().log_warning(warning) return