From 20ca5f04a312e6917ae844b4c08fbb788fde5aa0 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 31 Jul 2025 03:32:00 +0000 Subject: [PATCH 1/4] Fix(athena): Properly extend Athena dialect --- sqlmesh/core/dialect.py | 9 +++++++++ tests/core/test_dialect.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/sqlmesh/core/dialect.py b/sqlmesh/core/dialect.py index 1a42480c13..f5464e12bc 100644 --- a/sqlmesh/core/dialect.py +++ b/sqlmesh/core/dialect.py @@ -13,6 +13,7 @@ from sqlglot import Dialect, Generator, ParseError, Parser, Tokenizer, TokenType, exp from sqlglot.dialects.dialect import DialectType from sqlglot.dialects import DuckDB, Snowflake +import sqlglot.dialects.athena as athena from sqlglot.helper import seq_get from sqlglot.optimizer.normalize_identifiers import normalize_identifiers from sqlglot.optimizer.qualify_columns import quote_identifiers @@ -1014,6 +1015,14 @@ def extend_sqlglot() -> None: generators = {Generator} for dialect in Dialect.classes.values(): + # Athena picks a different Tokenizer / Parser / Generator depending on the query + # so this ensures that the extra ones it defines are also extended + if dialect == athena.Athena: + tokenizers.add(athena._TrinoTokenizer) + parsers.add(athena._TrinoParser) + generators.add(athena._TrinoGenerator) + generators.add(athena._HiveGenerator) + if hasattr(dialect, "Tokenizer"): tokenizers.add(dialect.Tokenizer) if hasattr(dialect, "Parser"): diff --git a/tests/core/test_dialect.py b/tests/core/test_dialect.py index ebf90bebf7..9c897d5436 100644 --- a/tests/core/test_dialect.py +++ b/tests/core/test_dialect.py @@ -12,7 +12,9 @@ select_from_values_for_batch_range, text_diff, ) +import sqlmesh.core.dialect as d from sqlmesh.core.model import SqlModel, load_sql_based_model +from sqlmesh.core.config.connection import DIALECT_TO_TYPE def test_format_model_expressions(): @@ -700,3 +702,18 @@ def test_model_name_cannot_be_string(): def test_parse_snowflake_create_schema_ddl(): assert parse_one("CREATE SCHEMA d.s", dialect="snowflake").sql() == "CREATE SCHEMA d.s" + + +@pytest.mark.parametrize("dialect", set(DIALECT_TO_TYPE.values())) +def test_sqlglot_extended_correctly(dialect: str) -> None: + # MODEL is a SQLMesh extension and not part of SQLGlot + # If we can roundtrip an expression containing MODEL across every dialect, then the SQLMesh extensions have been registered correctly + ast = d.parse_one("MODEL (name foo)", dialect=dialect) + assert isinstance(ast, d.Model) + name_prop = ast.find(exp.Property) + assert isinstance(name_prop, exp.Property) + assert name_prop.this == "name" + value = name_prop.args["value"] + assert isinstance(value, exp.Table) + assert value.sql() == "foo" + assert ast.sql(dialect=dialect) == "MODEL (\nname foo\n)" From 8cfb7175b5db2652100e040bfa2adc8d17072c89 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 31 Jul 2025 03:34:05 +0000 Subject: [PATCH 2/4] Temporarily enable Athena tests --- .circleci/continue_config.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index b93caf482e..a75da13361 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -297,16 +297,16 @@ workflows: matrix: parameters: engine: - - snowflake - - databricks - - redshift - - bigquery - - clickhouse-cloud + #- snowflake + #- databricks + #- redshift + #- bigquery + #- clickhouse-cloud - athena - filters: - branches: - only: - - main + #filters: + # branches: + # only: + # - main - ui_style - ui_test - vscode_test From 02c5e4eb8b8a017193d7a812a00ccc62eb15c404 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 31 Jul 2025 04:09:01 +0000 Subject: [PATCH 3/4] Fix tests --- tests/core/test_dialect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/test_dialect.py b/tests/core/test_dialect.py index 9c897d5436..11ffec3720 100644 --- a/tests/core/test_dialect.py +++ b/tests/core/test_dialect.py @@ -704,7 +704,7 @@ def test_parse_snowflake_create_schema_ddl(): assert parse_one("CREATE SCHEMA d.s", dialect="snowflake").sql() == "CREATE SCHEMA d.s" -@pytest.mark.parametrize("dialect", set(DIALECT_TO_TYPE.values())) +@pytest.mark.parametrize("dialect", sorted(set(DIALECT_TO_TYPE.values()))) def test_sqlglot_extended_correctly(dialect: str) -> None: # MODEL is a SQLMesh extension and not part of SQLGlot # If we can roundtrip an expression containing MODEL across every dialect, then the SQLMesh extensions have been registered correctly From dc43d2798ab520f7a78306a4836ae3fd8bc65a2d Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Thu, 31 Jul 2025 18:28:15 +0000 Subject: [PATCH 4/4] Revert "Temporarily enable Athena tests" This reverts commit 8cfb7175b5db2652100e040bfa2adc8d17072c89. --- .circleci/continue_config.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml index a75da13361..b93caf482e 100644 --- a/.circleci/continue_config.yml +++ b/.circleci/continue_config.yml @@ -297,16 +297,16 @@ workflows: matrix: parameters: engine: - #- snowflake - #- databricks - #- redshift - #- bigquery - #- clickhouse-cloud + - snowflake + - databricks + - redshift + - bigquery + - clickhouse-cloud - athena - #filters: - # branches: - # only: - # - main + filters: + branches: + only: + - main - ui_style - ui_test - vscode_test