diff --git a/sqlmesh/core/engine_adapter/databricks.py b/sqlmesh/core/engine_adapter/databricks.py index 946a7bdf74..173e1b08af 100644 --- a/sqlmesh/core/engine_adapter/databricks.py +++ b/sqlmesh/core/engine_adapter/databricks.py @@ -34,6 +34,8 @@ class DatabricksEngineAdapter(SparkEngineAdapter): SUPPORTS_CLONING = True SUPPORTS_MATERIALIZED_VIEWS = True SUPPORTS_MATERIALIZED_VIEW_SCHEMA = True + # Spark has this set to false for compatibility when mixing with Trino but that isn't a concern with Databricks + QUOTE_IDENTIFIERS_IN_VIEWS = True SCHEMA_DIFFER_KWARGS = { "support_positional_add": True, "nested_support": NestedSupport.ALL, diff --git a/tests/core/engine_adapter/integration/test_integration.py b/tests/core/engine_adapter/integration/test_integration.py index 5190d26e98..995875c778 100644 --- a/tests/core/engine_adapter/integration/test_integration.py +++ b/tests/core/engine_adapter/integration/test_integration.py @@ -3990,3 +3990,40 @@ def _set_config(gateway: str, config: Config) -> None: was_evaluated=True, day_delta=4, ) + + +def test_unicode_characters(ctx: TestContext, tmp_path: Path): + # Engines that don't quote identifiers in views are incompatible with unicode characters in model names + # at the time of writing this is Spark/Trino and they do this for compatibility reasons. + # I also think Spark may not support unicode in general but that would need to be verified. + if not ctx.engine_adapter.QUOTE_IDENTIFIERS_IN_VIEWS: + pytest.skip("Skipping as these engines have issues with unicode characters in model names") + + model_name = "客户数据" + table = ctx.table(model_name).sql(dialect=ctx.dialect) + (tmp_path / "models").mkdir(exist_ok=True) + + model_def = f""" + MODEL ( + name {table}, + kind FULL, + dialect '{ctx.dialect}' + ); + SELECT 1 as id + """ + + (tmp_path / "models" / "客户数据.sql").write_text(model_def) + + context = ctx.create_context(path=tmp_path) + context.plan(auto_apply=True, no_prompts=True) + + results = ctx.get_metadata_results() + assert len(results.views) == 1 + assert results.views[0].lower() == model_name + + schema = d.to_schema(ctx.schema(), dialect=ctx.dialect) + schema_name = schema.args["db"].this + schema.args["db"].set("this", "sqlmesh__" + schema_name) + table_results = ctx.get_metadata_results(schema) + assert len(table_results.tables) == 1 + assert table_results.tables[0].lower().startswith(schema_name.lower() + "________") diff --git a/tests/core/engine_adapter/test_databricks.py b/tests/core/engine_adapter/test_databricks.py index f482361c3c..27988fed39 100644 --- a/tests/core/engine_adapter/test_databricks.py +++ b/tests/core/engine_adapter/test_databricks.py @@ -195,7 +195,7 @@ def test_materialized_view_properties(mocker: MockFixture, make_mocked_engine_ad sql_calls = to_sql_calls(adapter) # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-materialized-view.html#syntax assert sql_calls == [ - "CREATE OR REPLACE MATERIALIZED VIEW test_table PARTITIONED BY (ds) AS SELECT 1", + "CREATE OR REPLACE MATERIALIZED VIEW `test_table` PARTITIONED BY (`ds`) AS SELECT 1", ]