Skip to content

Commit c668eef

Browse files
authored
Fix: ignore non-key "dialect" in MODEL/AUDIT block (#5651)
1 parent d4a3acb commit c668eef

File tree

2 files changed

+160
-2
lines changed

2 files changed

+160
-2
lines changed

sqlmesh/core/dialect.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,15 @@ def text_diff(
803803
return "\n".join(unified_diff(a_sql, b_sql))
804804

805805

806+
WS_OR_COMMENT = r"(?:\s|--[^\n]*\n|/\*.*?\*/)"
807+
HEADER = r"\b(?:model|audit)\b(?=\s*\()"
808+
KEY_BOUNDARY = r"(?:\(|,)" # key is preceded by either '(' or ','
809+
DIALECT_VALUE = r"['\"]?(?P<dialect>[a-z][a-z0-9]*)['\"]?"
810+
VALUE_BOUNDARY = r"(?=,|\))" # value is followed by comma or closing paren
811+
806812
DIALECT_PATTERN = re.compile(
807-
r"(model|audit).*?\(.*?dialect\s+'?([a-z]*)", re.IGNORECASE | re.DOTALL
813+
rf"{HEADER}.*?{KEY_BOUNDARY}{WS_OR_COMMENT}*dialect{WS_OR_COMMENT}+{DIALECT_VALUE}{WS_OR_COMMENT}*{VALUE_BOUNDARY}",
814+
re.IGNORECASE | re.DOTALL,
808815
)
809816

810817

@@ -895,7 +902,8 @@ def parse(
895902
A list of the parsed expressions: [Model, *Statements, Query, *Statements]
896903
"""
897904
match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE])
898-
dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect)
905+
dialect_str = match.group("dialect") if match else None
906+
dialect = Dialect.get_or_raise(dialect_str or default_dialect)
899907

900908
tokens = dialect.tokenize(sql)
901909
chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)]

tests/core/test_model.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,156 @@ def test_parse(assert_exp_eq):
27272727
)
27282728

27292729

2730+
def test_dialect_pattern():
2731+
def make_test_sql(text: str) -> str:
2732+
return f"""
2733+
MODEL (
2734+
name test_model,
2735+
kind INCREMENTAL_BY_TIME_RANGE(
2736+
time_column ds
2737+
),
2738+
{text}
2739+
);
2740+
2741+
SELECT 1;
2742+
"""
2743+
2744+
def assert_match(test_sql: str, expected_value: t.Optional[str] = "duckdb"):
2745+
match = d.DIALECT_PATTERN.search(test_sql)
2746+
2747+
dialect_str: t.Optional[str] = None
2748+
if expected_value is not None:
2749+
assert match
2750+
dialect_str = match.group("dialect")
2751+
2752+
assert dialect_str == expected_value
2753+
2754+
# single-quoted dialect
2755+
assert_match(
2756+
make_test_sql(
2757+
"""
2758+
dialect 'duckdb',
2759+
description 'there's a dialect foo in here too!'
2760+
"""
2761+
)
2762+
)
2763+
2764+
# bare dialect
2765+
assert_match(
2766+
make_test_sql(
2767+
"""
2768+
dialect duckdb,
2769+
description 'there's a dialect foo in here too!'
2770+
"""
2771+
)
2772+
)
2773+
2774+
# double-quoted dialect (allowed in BQ)
2775+
assert_match(
2776+
make_test_sql(
2777+
"""
2778+
dialect "duckdb",
2779+
description 'there's a dialect foo in here too!'
2780+
"""
2781+
)
2782+
)
2783+
2784+
# no dialect specified, "dialect" in description
2785+
test_sql = make_test_sql(
2786+
"""
2787+
description 'there's a dialect foo in here too!'
2788+
"""
2789+
)
2790+
2791+
matches = list(d.DIALECT_PATTERN.finditer(test_sql))
2792+
assert not matches
2793+
2794+
# line comment between properties
2795+
assert_match(
2796+
make_test_sql(
2797+
"""
2798+
tag my_tag, -- comment
2799+
dialect duckdb
2800+
"""
2801+
)
2802+
)
2803+
2804+
# block comment between properties
2805+
assert_match(
2806+
make_test_sql(
2807+
"""
2808+
tag my_tag, /* comment */
2809+
dialect duckdb
2810+
"""
2811+
)
2812+
)
2813+
2814+
# quoted empty dialect
2815+
assert_match(
2816+
make_test_sql(
2817+
"""
2818+
dialect '',
2819+
tag my_tag
2820+
"""
2821+
),
2822+
None,
2823+
)
2824+
2825+
# double-quoted empty dialect
2826+
assert_match(
2827+
make_test_sql(
2828+
"""
2829+
dialect "",
2830+
tag my_tag
2831+
"""
2832+
),
2833+
None,
2834+
)
2835+
2836+
# trailing comment after dialect value
2837+
assert_match(
2838+
make_test_sql(
2839+
"""
2840+
dialect duckdb -- trailing comment
2841+
"""
2842+
)
2843+
)
2844+
2845+
# dialect value isn't terminated by ',' or ')'
2846+
test_sql = make_test_sql(
2847+
"""
2848+
dialect duckdb -- trailing comment
2849+
tag my_tag
2850+
"""
2851+
)
2852+
2853+
matches = list(d.DIALECT_PATTERN.finditer(test_sql))
2854+
assert not matches
2855+
2856+
# dialect first
2857+
assert_match(
2858+
"""
2859+
MODEL(
2860+
dialect duckdb,
2861+
name my_name
2862+
);
2863+
"""
2864+
)
2865+
2866+
# full parse
2867+
sql = """
2868+
MODEL (
2869+
name test_model,
2870+
description 'this text mentions dialect foo but is not a property'
2871+
);
2872+
2873+
SELECT 1;
2874+
"""
2875+
expressions = d.parse(sql, default_dialect="duckdb")
2876+
model = load_sql_based_model(expressions)
2877+
assert model.dialect == ""
2878+
2879+
27302880
CONST = "bar"
27312881

27322882

0 commit comments

Comments
 (0)