diff --git a/crates/squawk_parser/src/grammar.rs b/crates/squawk_parser/src/grammar.rs index d02af889..ff6d449a 100644 --- a/crates/squawk_parser/src/grammar.rs +++ b/crates/squawk_parser/src/grammar.rs @@ -2702,8 +2702,7 @@ fn data_source(p: &mut Parser<'_>) { p.eat(LATERAL_KW); match p.current() { L_PAREN => { - // TODO: this should be `paren_select` instead of a general `tuple_expr`, since only a select statement is allowed inside - tuple_expr(p); + paren_data_source(p); opt_alias(p); } JSON_TABLE_KW => { @@ -2731,6 +2730,33 @@ fn data_source(p: &mut Parser<'_>) { } } +fn paren_data_source(p: &mut Parser<'_>) -> CompletedMarker { + assert!(p.at(L_PAREN)); + let m = p.start(); + p.bump(L_PAREN); + + // Try to parse as a SELECT statement first + if p.at_ts(SELECT_FIRST) { + if select(p, None).is_some() { + p.expect(R_PAREN); + return m.complete(p, PAREN_EXPR); + } + } + + // Then try to parse as a FROM_ITEM (which includes table references and joins) + if opt_from_item(p) { + p.expect(R_PAREN); + return m.complete(p, PAREN_EXPR); + } + + // Fall back to general expression parsing + if expr(p).is_none() { + p.error("expected an expression"); + } + p.expect(R_PAREN); + m.complete(p, PAREN_EXPR) +} + // USING data_source ON join_condition fn merge_using_clause(p: &mut Parser<'_>) { let m = p.start(); @@ -2834,7 +2860,7 @@ fn join(p: &mut Parser<'_>) { // USING ( join_column [, ...] ) p.expect(USING_KW); if p.at(L_PAREN) { - tuple_expr(p); + column_list(p); } else { p.error("expected L_PAREN"); } diff --git a/crates/squawk_parser/tests/data/ok/select.sql b/crates/squawk_parser/tests/data/ok/select.sql index 185bd980..f77c9021 100644 --- a/crates/squawk_parser/tests/data/ok/select.sql +++ b/crates/squawk_parser/tests/data/ok/select.sql @@ -356,6 +356,11 @@ select * from t full join t2 using (id); -- multi conditions select * from t join t2 on t2.team_id = t.team_id and t2.id = t.org_id; +-- nested joins +select f1, count(*) from +t1 x(x0,x1) left join (t1 left join t2 using(f1)) on (x0 = 0) +group by f1; + -- using w/ join alias SELECT * from t join t2 using (id) as foo; @@ -487,10 +492,10 @@ select current_schema; select * from t order by a using >>>; -- order_by_regression -SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value -FROM sensors_uncompressed +SELECT sensor_id, DATE_TRUNC('day', ts) AS day, MAX(value) AS max_value, MIN(value) AS min_value +FROM sensors_uncompressed WHERE ts >= DATE '2023-12-21' AND ts < DATE '2023-12-22' -GROUP BY sensor_id, DATE_TRUNC('day', ts) +GROUP BY sensor_id, DATE_TRUNC('day', ts) ORDER BY sensor_id, day; -- select_from_user_table diff --git a/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap b/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap index 38fbc770..71a77a3e 100644 --- a/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__misc_ok.snap @@ -3020,10 +3020,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "jobid" + COLUMN + NAME_REF + IDENT "jobid" R_PAREN ")" WHITESPACE "\n" WHERE_CLAUSE @@ -6300,10 +6301,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "USING" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "turbine_id" + COLUMN + NAME_REF + IDENT "turbine_id" R_PAREN ")" WHITESPACE "\n" WHERE_CLAUSE diff --git a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap index 7528a718..7be1834e 100644 --- a/crates/squawk_parser/tests/snapshots/tests__select_ok.snap +++ b/crates/squawk_parser/tests/snapshots/tests__select_ok.snap @@ -4366,10 +4366,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n" @@ -4398,14 +4399,16 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - TUPLE_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" COMMA "," WHITESPACE " " - NAME_REF - IDENT "foo" + COLUMN + NAME_REF + IDENT "foo" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4436,10 +4439,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4470,10 +4474,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4539,6 +4544,98 @@ SOURCE_FILE IDENT "org_id" SEMICOLON ";" WHITESPACE "\n\n" + COMMENT "-- nested joins" + WHITESPACE "\n" + SELECT + SELECT_CLAUSE + SELECT_KW "select" + WHITESPACE " " + TARGET_LIST + TARGET + NAME_REF + IDENT "f1" + COMMA "," + WHITESPACE " " + TARGET + CALL_EXPR + NAME_REF + IDENT "count" + ARG_LIST + L_PAREN "(" + STAR "*" + R_PAREN ")" + WHITESPACE " " + FROM_CLAUSE + FROM_KW "from" + WHITESPACE "\n" + NAME_REF + IDENT "t1" + WHITESPACE " " + ALIAS + NAME + IDENT "x" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME + IDENT "x0" + COMMA "," + COLUMN + NAME + IDENT "x1" + R_PAREN ")" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + NAME_REF + IDENT "t1" + WHITESPACE " " + JOIN + LEFT_KW "left" + WHITESPACE " " + JOIN_KW "join" + WHITESPACE " " + NAME_REF + IDENT "t2" + WHITESPACE " " + USING_CLAUSE + USING_KW "using" + COLUMN_LIST + L_PAREN "(" + COLUMN + NAME_REF + IDENT "f1" + R_PAREN ")" + R_PAREN ")" + WHITESPACE " " + ON_KW "on" + WHITESPACE " " + PAREN_EXPR + L_PAREN "(" + BIN_EXPR + NAME_REF + IDENT "x0" + WHITESPACE " " + EQ "=" + WHITESPACE " " + LITERAL + INT_NUMBER "0" + R_PAREN ")" + WHITESPACE "\n" + GROUP_BY_CLAUSE + GROUP_KW "group" + WHITESPACE " " + BY_KW "by" + WHITESPACE " " + NAME_REF + IDENT "f1" + SEMICOLON ";" + WHITESPACE "\n\n" COMMENT "-- using w/ join alias" WHITESPACE "\n" SELECT @@ -4564,10 +4661,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" WHITESPACE " " ALIAS @@ -4707,10 +4805,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "id" + COLUMN + NAME_REF + IDENT "id" R_PAREN ")" WHITESPACE "\n" JOIN @@ -4724,10 +4823,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - EVENT_KW "event" + COLUMN + NAME_REF + EVENT_KW "event" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4832,10 +4932,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "USING" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "did" + COLUMN + NAME_REF + IDENT "did" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -4862,10 +4963,11 @@ SOURCE_FILE USING_CLAUSE USING_KW "using" WHITESPACE " " - PAREN_EXPR + COLUMN_LIST L_PAREN "(" - NAME_REF - IDENT "a_id" + COLUMN + NAME_REF + IDENT "a_id" R_PAREN ")" SEMICOLON ";" WHITESPACE "\n\n" @@ -5672,13 +5774,13 @@ SOURCE_FILE WHITESPACE " " NAME IDENT "min_value" - WHITESPACE " \n" + WHITESPACE "\n" FROM_CLAUSE FROM_KW "FROM" WHITESPACE " " NAME_REF IDENT "sensors_uncompressed" - WHITESPACE " \n" + WHITESPACE "\n" WHERE_CLAUSE WHERE_KW "WHERE" WHITESPACE " " @@ -5732,7 +5834,7 @@ SOURCE_FILE NAME_REF IDENT "ts" R_PAREN ")" - WHITESPACE " \n" + WHITESPACE "\n" ORDER_BY_CLAUSE ORDER_KW "ORDER" WHITESPACE " " diff --git a/crates/squawk_parser/tests/tests.rs b/crates/squawk_parser/tests/tests.rs index 91857103..5c14f671 100644 --- a/crates/squawk_parser/tests/tests.rs +++ b/crates/squawk_parser/tests/tests.rs @@ -75,6 +75,7 @@ fn parser_err(fixture: Fixture<&str>) { ); } +// 102 failing #[dir_test( dir: "$CARGO_MANIFEST_DIR/tests/data/regression_suite", glob: "*.sql",