From 7edf637370acda6573e6c2254c1e2de7774e6ae1 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Sat, 24 May 2025 18:30:38 +0200 Subject: [PATCH 1/4] fix: paren data source --- crates/squawk_parser/src/grammar.rs | 32 +++- crates/squawk_parser/tests/data/ok/select.sql | 11 +- .../tests/snapshots/tests__join_using_ok.snap | 96 ++++++++++ .../tests/snapshots/tests__misc_ok.snap | 14 +- .../tests/snapshots/tests__select_ok.snap | 166 ++++++++++++++---- crates/squawk_parser/tests/tests.rs | 1 + 6 files changed, 277 insertions(+), 43 deletions(-) create mode 100644 crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 65377c4a..9d46dd86 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -2710,7 +2710,8 @@ fn data_source(p: &mut Parser<'_>) { match p.current() { L_PAREN => { // TODO: this should be `paren_select` instead of a general `tuple_expr`, since only a select statement is allowed inside - tuple_expr(p); + // THIS CAN ALSO BE ANOTHER JOIN EXPR OR A PAREN_SELECT + paren_data_source(p); opt_alias(p); } JSON_TABLE_KW => { @@ -2738,6 +2739,33 @@ fn data_source(p: &mut Parser<'_>) { } } +fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(L_PAREN)); + let m = p.start(); + p.bump(L_PAREN); + + // Try to parse as a SELECT statement first + if p.at_ts(SELECT_FIRST) { + if select_stmt(p, None).is_some() { + p.expect(R_PAREN); + return m.complete(p, PAREN_EXPR); + } + } + + // Then try to parse as a FROM_ITEM (which includes table references and joins) + if opt_from_item(p) { + p.expect(R_PAREN); + return m.complete(p, PAREN_EXPR); + } + + // Fall back to general expression parsing + if expr(p).is_none() { + p.error("expected an expression"); + } + p.expect(R_PAREN); + m.complete(p, PAREN_EXPR) +} + // USING data_source ON join_condition fn merge_using_clause(p: &mut Parser<'_>) { let m = p.start(); @@ -2841,7 +2869,7 @@ fn join(p: &mut Parser<'_>) { // USING ( join_column [, ...] ) p.expect(USING_KW); if p.at(L_PAREN) { - tuple_expr(p); + column_list(p); } else { p.error("expected L_PAREN"); } diff --git a/crates/squawk_parser/tests/data/ok/select.sql b/crates/squawk_parser/tests/data/ok/select.sql index 185bd980..f77c9021 100644 --- a/crates/squawk_parser/tests/data/ok/select.sql +++ b/crates/squawk_parser/tests/data/ok/select.sql @@ -356,6 +356,11 @@ select * from t full join t2 using (id); -- multi conditions select * from t join t2 on t2.team_id = t.team_id and t2.id = t.org_id; +-- nested joins +select f1, count(*) from +t1 x(x0,x1) left join (t1 left join t2 using(f1)) on (x0 = 0) +group by f1; + -- using w/ join alias SELECT * from t join t2 using (id) as foo; @@ -487,10 +492,10 @@ select current_schema; select * from t order by a using >>>; -- order_by_regression -SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value -FROM sensors_uncompressed +SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value +FROM sensors_uncompressed WHERE ts >= DATE '2023-12-21' AND ts < DATE '2023-12-22' -GROUP BY sensor_id, DATE_TRUNC('day', ts) +GROUP BY sensor_id, DATE_TRUNC('day', ts) ORDER BY sensor_id, day; -- select_from_user_table diff --git a/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap b/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap new file mode 100644 index 00000000..a1d856e3 --- /dev/null +++ b/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap @@ -0,0 +1,96 @@ +--- +source: crates/squawk_parser/tests/tests.rs +input_file: crates/squawk_parser/tests/data/ok/join_using.sql +snapshot_kind: text +--- +SOURCE_FILE + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "f1" + COMMA "," + WHITESPACE " " + TARGET + CALL_EXPR + NAME_REF + IDENT "count" + ARG_LIST + L_PAREN "(" + STAR "*" + R_PAREN ")" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE "\n" + NAME_REF + IDENT "t1" + WHITESPACE " " + ALIAS + NAME + IDENT "x" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME + IDENT "x0" + COMMA "," + COLUMN + NAME + IDENT "x1" + R_PAREN ")" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + NAME_REF + IDENT "t1" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + NAME_REF + IDENT "t2" + WHITESPACE " " + USING_CLAUSE + USING_KW "using" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME_REF + IDENT "f1" + R_PAREN ")" + R_PAREN ")" + WHITESPACE " " + ON_KW "on" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + BIN_EXPR + NAME_REF + IDENT "x0" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + INT_NUMBER "0" + R_PAREN ")" + WHITESPACE "\n" + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + NAME_REF + IDENT "f1" + SEMICOLON ";" + WHITESPACE "\n" diff --git a/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap b/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap index d57e4060..aa273a0b 100644 --- a/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap @@ -3011,10 +3011,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "jobid" + COLUMN + NAME_REF + IDENT "jobid" R_PAREN ")" WHITESPACE "\n" WHERE_CLAUSE @@ -6286,10 +6287,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "USING" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "turbine_id" + COLUMN + NAME_REF + IDENT "turbine_id" R_PAREN ")" WHITESPACE "\n" WHERE_CLAUSE diff --git a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap index c4040d4f..5149c218 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap @@ -4367,10 +4367,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n" @@ -4399,14 +4400,16 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - TUPLE_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" COMMA "," WHITESPACE " " - NAME_REF - IDENT "foo" + COLUMN + NAME_REF + IDENT "foo" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4437,10 +4440,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4471,10 +4475,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4540,6 +4545,98 @@ SOURCE_FILE IDENT "org_id" SEMICOLON ";" WHITESPACE "\n\n" + COMMENT "-- nested joins" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "f1" + COMMA "," + WHITESPACE " " + TARGET + CALL_EXPR + NAME_REF + IDENT "count" + ARG_LIST + L_PAREN "(" + STAR "*" + R_PAREN ")" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE "\n" + NAME_REF + IDENT "t1" + WHITESPACE " " + ALIAS + NAME + IDENT "x" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME + IDENT "x0" + COMMA "," + COLUMN + NAME + IDENT "x1" + R_PAREN ")" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + NAME_REF + IDENT "t1" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + NAME_REF + IDENT "t2" + WHITESPACE " " + USING_CLAUSE + USING_KW "using" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME_REF + IDENT "f1" + R_PAREN ")" + R_PAREN ")" + WHITESPACE " " + ON_KW "on" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + BIN_EXPR + NAME_REF + IDENT "x0" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + INT_NUMBER "0" + R_PAREN ")" + WHITESPACE "\n" + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + NAME_REF + IDENT "f1" + SEMICOLON ";" + WHITESPACE "\n\n" COMMENT "-- using w/ join alias" WHITESPACE "\n" SELECT @@ -4565,10 +4662,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" WHITESPACE " " ALIAS @@ -4708,10 +4806,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" WHITESPACE "\n" JOIN @@ -4725,10 +4824,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - EVENT_KW "event" + COLUMN + NAME_REF + EVENT_KW "event" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4833,10 +4933,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "USING" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "did" + COLUMN + NAME_REF + IDENT "did" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4863,10 +4964,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "a_id" + COLUMN + NAME_REF + IDENT "a_id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -5669,13 +5771,13 @@ SOURCE_FILE WHITESPACE " " NAME IDENT "min_value" - WHITESPACE " \n" + WHITESPACE "\n" FROM_CLAUSE FROM_KW "FROM" WHITESPACE " " NAME_REF IDENT "sensors_uncompressed" - WHITESPACE " \n" + WHITESPACE "\n" WHERE_CLAUSE WHERE_KW "WHERE" WHITESPACE " " @@ -5729,7 +5831,7 @@ SOURCE_FILE NAME_REF IDENT "ts" R_PAREN ")" - WHITESPACE " \n" + WHITESPACE "\n" ORDER_BY_CLAUSE ORDER_KW "ORDER" WHITESPACE " " diff --git a/crates/squawk_parser/tests/tests.rs b/crates/squawk_parser/tests/tests.rs index 49c21b26..cad68381 100644 --- a/crates/squawk_parser/tests/tests.rs +++ b/crates/squawk_parser/tests/tests.rs @@ -75,6 +75,7 @@ fn parser_err(fixture: Fixture<&str>) { ); } +// 102 failing #[dir_test( dir: "$CARGO_MANIFEST_DIR/tests/data/regression_suite", glob: "*.sql", From 54ee2daf86cf4dfa5cc970aee63610eac14a1606 Mon Sep 17 00:00:00 2001 From: psteinroe Date: Sat, 24 May 2025 18:32:19 +0200 Subject: [PATCH 2/4] cleanup --- .../tests/snapshots/tests__join_using_ok.snap | 96 ------------------- 1 file changed, 96 deletions(-) delete mode 100644 crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap diff --git a/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap b/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap deleted file mode 100644 index a1d856e3..00000000 --- a/crates/squawk_parser/tests/snapshots/tests__join_using_ok.snap +++ /dev/null @@ -1,96 +0,0 @@ ---- -source: crates/squawk_parser/tests/tests.rs -input_file: crates/squawk_parser/tests/data/ok/join_using.sql -snapshot_kind: text ---- -SOURCE_FILE - SELECT - SELECT_CLAUSE - SELECT_KW "select" - WHITESPACE " " - TARGET_LIST - TARGET - NAME_REF - IDENT "f1" - COMMA "," - WHITESPACE " " - TARGET - CALL_EXPR - NAME_REF - IDENT "count" - ARG_LIST - L_PAREN "(" - STAR "*" - R_PAREN ")" - WHITESPACE " " - FROM_CLAUSE - FROM_KW "from" - WHITESPACE "\n" - NAME_REF - IDENT "t1" - WHITESPACE " " - ALIAS - NAME - IDENT "x" - COLUMN_LIST - L_PAREN "(" - COLUMN - NAME - IDENT "x0" - COMMA "," - COLUMN - NAME - IDENT "x1" - R_PAREN ")" - WHITESPACE " " - JOIN - LEFT_KW "left" - WHITESPACE " " - JOIN_KW "join" - WHITESPACE " " - PAREN_EXPR - L_PAREN "(" - NAME_REF - IDENT "t1" - WHITESPACE " " - JOIN - LEFT_KW "left" - WHITESPACE " " - JOIN_KW "join" - WHITESPACE " " - NAME_REF - IDENT "t2" - WHITESPACE " " - USING_CLAUSE - USING_KW "using" - COLUMN_LIST - L_PAREN "(" - COLUMN - NAME_REF - IDENT "f1" - R_PAREN ")" - R_PAREN ")" - WHITESPACE " " - ON_KW "on" - WHITESPACE " " - PAREN_EXPR - L_PAREN "(" - BIN_EXPR - NAME_REF - IDENT "x0" - WHITESPACE " " - EQ "=" - WHITESPACE " " - LITERAL - INT_NUMBER "0" - R_PAREN ")" - WHITESPACE "\n" - GROUP_BY_CLAUSE - GROUP_KW "group" - WHITESPACE " " - BY_KW "by" - WHITESPACE " " - NAME_REF - IDENT "f1" - SEMICOLON ";" - WHITESPACE "\n" From a0f48b6f8d3d2aaf4847f4b241f63835a228816a Mon Sep 17 00:00:00 2001 From: psteinroe Date: Sat, 24 May 2025 18:35:39 +0200 Subject: [PATCH 3/4] cleanup --- crates/squawk_parser/src/grammar.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index 9d46dd86..90b16ba4 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -2709,8 +2709,6 @@ fn data_source(p: &mut Parser<'_>) { p.eat(LATERAL_KW); match p.current() { L_PAREN => { - // TODO: this should be `paren_select` instead of a general `tuple_expr`, since only a select statement is allowed inside - // THIS CAN ALSO BE ANOTHER JOIN EXPR OR A PAREN_SELECT paren_data_source(p); opt_alias(p); } From a0b1d25e974a9fe8dc4360408e8b99181361f24e Mon Sep 17 00:00:00 2001 From: psteinroe Date: Sat, 24 May 2025 18:41:57 +0200 Subject: [PATCH 4/4] fix after merge --- crates/squawk_parser/src/grammar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index cba9e67d..ff6d449a 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -2737,7 +2737,7 @@ fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker { // Try to parse as a SELECT statement first if p.at_ts(SELECT_FIRST) { - if select_stmt(p, None).is_some() { + if select(p, None).is_some() { p.expect(R_PAREN); return m.complete(p, PAREN_EXPR); }