From 96dc914a2aea1c6aa5d8c0dc59028b1c7f2bebd5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 23 Jan 2026 02:41:24 -0800 Subject: [PATCH 01/12] Added redundant HAS removal optimization and updated tests, WIP handling the MENU tests due to dataset/metadata issues --- pydough/conversion/relational_converter.py | 8 +++++- .../database_connectors/database_connector.py | 1 + pydough/unqualified/qualification.py | 3 +- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 24 ++++++++++++++++ tests/test_pipeline_tpch_custom.py | 13 +++++++++ tests/test_plan_refsols/common_prefix_ab.txt | 4 +-- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/menu_5556.txt | 12 ++++++++ tests/test_plan_refsols/redundant_has.txt | 8 ++++++ .../supplier_pct_national_qty.txt | 2 +- .../defog_broker_adv8_mysql.sql | 17 +++-------- .../defog_broker_adv8_postgres.sql | 17 +++-------- .../defog_broker_adv8_snowflake.sql | 17 +++-------- .../defog_broker_adv8_sqlite.sql | 17 +++-------- tests/test_sql_refsols/menu_5556_ansi.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_mysql.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_postgres.sql | 28 +++++++++++++++++++ .../test_sql_refsols/menu_5556_snowflake.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_sqlite.sql | 28 +++++++++++++++++++ 20 files changed, 228 insertions(+), 59 deletions(-) create mode 100644 tests/test_plan_refsols/menu_5556.txt create mode 100644 tests/test_plan_refsols/redundant_has.txt create mode 100644 tests/test_sql_refsols/menu_5556_ansi.sql create mode 100644 tests/test_sql_refsols/menu_5556_mysql.sql create mode 100644 tests/test_sql_refsols/menu_5556_postgres.sql create mode 100644 tests/test_sql_refsols/menu_5556_snowflake.sql create mode 100644 tests/test_sql_refsols/menu_5556_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c67d4117d..c86d0bec1 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -731,10 +731,16 @@ def handle_children( child_output = self.apply_aggregations( child, child_output, child.subtree.agg_keys ) + join_type: JoinType = child.connection_type.join_type + # Semi-joins on singular subtrees can be promoted to + # inner joins to avoid unnecessary complexity and + # improve performance. + if join_type == JoinType.SEMI and child.subtree.is_singular(): + join_type = JoinType.INNER context = self.join_outputs( context, child_output, - child.connection_type.join_type, + join_type, cardinality, child.reverse_cardinality, join_keys, diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index b34189442..303c7581c 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -52,6 +52,7 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: try: self.cursor.execute(sql) except Exception as e: + breakpoint() print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") raise pydough.active_session.error_builder.sql_runtime_failure( sql, e, True diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index c1566b8a6..e6e643cb5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -595,7 +595,8 @@ def qualify_access( ) if ( - isinstance(qualified_parent, GlobalContext) + isinstance(unqualified_parent, UnqualifiedRoot) + and isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name and not is_child ) or ( diff --git a/tests/conftest.py b/tests/conftest.py index 21d433ccd..2df9a6916 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -690,7 +690,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index d8b24a183..3ddba4fbf 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -113,6 +113,30 @@ ), id="wdi_albania_footnotes_1978", ), + pytest.param( + PyDoughPandasTest( + """ +result = menu.menu.WHERE( + HAS(menupages.menuitems.dish.WHERE(LOWER(name) == "baked apples with cream")) +).CALCULATE( + sponsor_name=sponsor, + max_item_price=MAX(menupages.menuitems.price) +).TOP_K( + 1, by=max_item_price.DESC() +).CALCULATE( + sponsor=sponsor_name +) + """, + "menu", + lambda: pd.DataFrame( + { + "sponsor": ["foo"], + } + ), + "menu_5556", + ), + id="menu_5556", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index ddb9a7433..3379d4c53 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2681,6 +2681,19 @@ ), id="quarter_function_test", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [30183], + } + ), + "redundant_has", + ), + id="redundant_has", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 08996397b..93550a02f 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 05678bd68..946eb0b41 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/menu_5556.txt b/tests/test_plan_refsols/menu_5556.txt new file mode 100644 index 000000000..1b56e06ba --- /dev/null +++ b/tests/test_plan_refsols/menu_5556.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('sponsor', sponsor)], orderings=[(max_price):desc_last], limit=1:numeric) + JOIN(condition=t0.id == t1.menu_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_price': t1.max_price, 'sponsor': t0.sponsor}) + SCAN(table=main.Menu, columns={'id': id, 'sponsor': sponsor}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_price': max_price, 'menu_id': menu_id}) + AGGREGATE(keys={'menu_id': menu_id}, aggregations={'max_price': MAX(price), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.dish_id == t1.id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'menu_id': t0.menu_id, 'n_rows': t1.n_rows, 'price': t0.price}) + JOIN(condition=t0.id == t1.menu_page_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'dish_id': t1.dish_id, 'menu_id': t0.menu_id, 'price': t1.price}) + SCAN(table=main.MenuPage, columns={'id': id, 'menu_id': menu_id}) + SCAN(table=main.MenuItem, columns={'dish_id': dish_id, 'menu_page_id': menu_page_id, 'price': price}) + PROJECT(columns={'id': id, 'n_rows': 1:numeric}) + FILTER(condition=LOWER(name) == 'baked apples with cream':string, columns={'id': id}) + SCAN(table=main.Dish, columns={'id': id, 'name': name}) diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt new file mode 100644 index 000000000..ab1dc1a3f --- /dev/null +++ b/tests/test_plan_refsols/redundant_has.txt @@ -0,0 +1,8 @@ +gROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index d3a24fe65..86074e8ec 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -2,7 +2,7 @@ ROOT(columns=[('supplier_name', anything_s_name), ('nation_name', anything_n_nam AGGREGATE(keys={'l_suppkey': s_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql index 17fb88b4a..743e4da74 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbCustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbTransaction.sbtxamount), 0) AS total_amount FROM main.sbTransaction AS sbTransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbTransaction.sbtxcustid +JOIN main.sbCustomer AS sbCustomer + ON LOWER(sbCustomer.sbcustcountry) = 'usa' + AND sbCustomer.sbcustid = sbTransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbTransaction.sbtxdatetime < CAST(DATE_SUB( + sbTransaction.sbtxdatetime < CAST(DATE_SUB( CURRENT_TIMESTAMP(), INTERVAL ( ( diff --git a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql index 26080ce78..f8c0db2f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', CURRENT_TIMESTAMP - CAST(( EXTRACT(DOW FROM CURRENT_TIMESTAMP) + 6 diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql index 37f29683c..712d5a34e 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', DATEADD( DAY, diff --git a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql index b94aec102..f29c3de35 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE( + sbtransaction.sbtxdatetime < DATE( 'now', '-' || CAST(( CAST(STRFTIME('%w', DATETIME('now')) AS INTEGER) + 6 diff --git a/tests/test_sql_refsols/menu_5556_ansi.sql b/tests/test_sql_refsols/menu_5556_ansi.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_ansi.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_mysql.sql b/tests/test_sql_refsols/menu_5556_mysql.sql new file mode 100644 index 000000000..0e0ec128a --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_mysql.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.Dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + MenuPage.menu_id, + MAX(MenuItem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.MenuPage AS MenuPage + JOIN main.MenuItem AS MenuItem + ON MenuItem.menu_page_id = MenuPage.id + LEFT JOIN _s3 AS _s3 + ON MenuItem.dish_id = _s3.id + GROUP BY + 1 +) +SELECT + Menu.sponsor +FROM main.Menu AS Menu +JOIN _t1 AS _t1 + ON Menu.id = _t1.menu_id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_postgres.sql b/tests/test_sql_refsols/menu_5556_postgres.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_postgres.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_snowflake.sql b/tests/test_sql_refsols/menu_5556_snowflake.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_snowflake.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_sqlite.sql b/tests/test_sql_refsols/menu_5556_sqlite.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_sqlite.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 From 03630d36b76b586787150308d35c3a33f153a9c8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 26 Jan 2026 10:39:58 -0800 Subject: [PATCH 02/12] Working on filter merging program --- pydough/conversion/hybrid_filter_merger.py | 192 ++++++++++++++++++ pydough/conversion/hybrid_translator.py | 26 ++- tests/test_pipeline_s3_datasets.py | 2 +- tests/test_pipeline_tpch_custom.py | 67 ++++++ .../count_multiple_filters_a.txt | 8 + .../count_multiple_filters_b.txt | 24 +++ .../count_multiple_filters_c.txt | 23 +++ 7 files changed, 339 insertions(+), 3 deletions(-) create mode 100644 pydough/conversion/hybrid_filter_merger.py create mode 100644 tests/test_plan_refsols/count_multiple_filters_a.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_b.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_c.txt diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py new file mode 100644 index 000000000..fedf8ccb0 --- /dev/null +++ b/pydough/conversion/hybrid_filter_merger.py @@ -0,0 +1,192 @@ +""" +Logic to merge multiple subtrees in the hybrid tree into one if they are the +same except one of them has more filters than the other and is only used in +a COUNT aggregation, meaning the filter can be implemented by doing a SUM on +the less-filtered subtree where the SUM argument is the additional filters. +""" + +import copy + +import pydough.pydough_operators as pydop +from pydough.qdag import Literal +from pydough.types import BooleanType, NumericType + +from .hybrid_connection import ConnectionType +from .hybrid_expressions import ( + HybridExpr, + HybridFunctionExpr, + HybridLiteralExpr, +) +from .hybrid_operations import ( + HybridCalculate, + HybridFilter, + HybridLimit, +) +from .hybrid_tree import HybridTree + + +class HybridFilterMerger: + """ + TODO + """ + + def merge_filters(self, tree: HybridTree) -> None: + """ + TODO + """ + # Run the main procedure on subtrees with multiple children. + if len(tree.children) > 1: + # Identify which children are only used by a COUNT aggregation that is + # not ONLY_MATCH. + mergeable_children: set[int] = self.identify_mergeable_children(tree) + + child_filters: list[set[HybridExpr]] = [ + self.get_final_filters(child.subtree) for child in tree.children + ] + + child_isomorphisms: list[set[int]] = self.get_child_isomorphisms(tree) + + filter_dag: list[int | None] = self.make_filter_dag( + mergeable_children, child_filters, child_isomorphisms + ) + + print() + print(tree) + print(mergeable_children) + print(child_filters) + print(child_isomorphisms) + print(filter_dag) + + for source_idx, target_idx in enumerate(filter_dag): + if target_idx is None: + continue + print(source_idx, "->", target_idx) + extra_filters: set[HybridExpr] = ( + child_filters[source_idx] - child_filters[target_idx] + ) + print(extra_filters) + assert len(extra_filters) > 0 + new_cond: HybridExpr + if len(extra_filters) == 1: + new_cond = next(iter(extra_filters)) + else: + new_cond = HybridFunctionExpr( + pydop.BAN, + sorted(extra_filters, key=repr), + BooleanType(), + ) + numeric_expr: HybridExpr = HybridFunctionExpr( + pydop.IFF, + [ + new_cond, + HybridLiteralExpr(Literal(1, NumericType())), + HybridLiteralExpr(Literal(0, NumericType())), + ], + NumericType(), + ) + sum_expr: HybridExpr = HybridFunctionExpr( + pydop.SUM, + [numeric_expr], + BooleanType(), + ) + print(sum_expr) + # agg_name: str = tree.gen_agg_name(tree.children[target_idx]) + + # Run the procedure recursively on the parent tree and the child + # subtrees. + if tree.parent is not None: + self.merge_filters(tree.parent) + for child in tree.children: + self.merge_filters(child.subtree) + + def identify_mergeable_children(self, tree: HybridTree) -> set[int]: + """ + TODO + """ + return { + idx + for idx, child in enumerate(tree.children) + if ( + child.connection_type == ConnectionType.AGGREGATION + and {repr(v) for v in child.aggs.values()} == {"COUNT()"} + ) + } + + def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: + """ + TODO + """ + result: set[HybridExpr] = set() + for operation in reversed(tree.pipeline): + if isinstance(operation, HybridFilter): + result.update(operation.condition.get_conjunction()) + if operation.condition.contains_window_functions(): + break + elif isinstance(operation, HybridLimit): + break + elif isinstance(operation, HybridCalculate): + if any( + expr.contains_window_functions() + for expr in operation.new_expressions.values() + ): + break + return result + + def get_child_isomorphisms(self, tree: HybridTree) -> list[set[int]]: + """ + TODO + """ + filter_stripped_forms: list[str] = [ + self.get_filter_stripped_form(child.subtree) for child in tree.children + ] + result: list[set[int]] = [] + for i, form in enumerate(filter_stripped_forms): + alternatives: set[int] = set() + for j, other_form in enumerate(filter_stripped_forms): + if i != j and form == other_form: + alternatives.add(j) + result.append(alternatives) + return result + + def get_filter_stripped_form(self, tree: HybridTree) -> str: + """ + TODO + """ + stripped_tree = copy.deepcopy(tree) + for idx, operation in reversed(list(enumerate(stripped_tree.pipeline))): + if isinstance(operation, HybridFilter): + stripped_tree.pipeline.pop(idx) + if operation.condition.contains_window_functions(): + break + elif isinstance(operation, HybridLimit): + break + elif isinstance(operation, HybridCalculate): + if any( + expr.contains_window_functions() + for expr in operation.new_expressions.values() + ): + break + return repr(stripped_tree) + + def make_filter_dag( + self, + mergeable_children: set[int], + child_filters: list[set[HybridExpr]], + child_isomorphisms: list[set[int]], + ) -> list[int | None]: + """ + TODO + """ + dag: list[int | None] = [None for _ in range(len(child_filters))] + for idx in mergeable_children: + for other_idx in child_isomorphisms[idx]: + if child_filters[other_idx] < child_filters[idx]: + dag[idx] = other_idx + break + for idx in range(len(dag)): + if dag[idx] is not None: + while True: + target_idx: int | None = dag[idx] + if target_idx is not None: + dag[idx] = dag[target_idx] + return dag diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index c8e1e1617..c9cc27488 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -64,6 +64,7 @@ HybridSidedRefExpr, HybridWindowExpr, ) +from .hybrid_filter_merger import HybridFilterMerger from .hybrid_operations import ( HybridCalculate, HybridCollectionAccess, @@ -1680,6 +1681,19 @@ def run_hybrid_decorrelation(self, hybrid: "HybridTree") -> None: decorr.find_correlated_children(hybrid) decorr.decorrelate_hybrid_tree(hybrid) + def run_filter_merging(self, hybrid: "HybridTree") -> None: + """ + Invokes the procedure to merge identical child subtrees in the hybrid + tree if they are identical except for the filters they have, which can + be emulated via a SUM on a predicate. The transformation is done + in-place. + + Args: + `hybrid`: The hybrid tree to run filter merging on. + """ + filter_merger: HybridFilterMerger = HybridFilterMerger() + filter_merger.merge_filters(hybrid) + def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: """ Convert a PyDough QDAG node to a hybrid tree, including any necessary @@ -1704,10 +1718,18 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - # 6. Run any final rewrites, such as turning MEDIAN into an average + print() + print("BEFORE FILTER MERGING") + print(hybrid) + # 5. Run the filter-merging procedure. + self.run_filter_merging(hybrid) + print() + print("AFTER FILTER MERGING") + print(hybrid) + # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) - # 7. Remove any dead children in the hybrid tree that are no longer + # 8. Remove any dead children in the hybrid tree that are no longer # being used. hybrid.remove_dead_children(set()) return hybrid diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 3ddba4fbf..0e73a5ec8 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -130,7 +130,7 @@ "menu", lambda: pd.DataFrame( { - "sponsor": ["foo"], + "sponsor": ["MURRAY HILL HOTEL"], } ), "menu_5556", diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 3379d4c53..82827565f 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2694,6 +2694,73 @@ ), id="redundant_has", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n2=COUNT(customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [268], + } + ), + "count_multiple_filters_a", + ), + id="count_multiple_filters_a", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n2=COUNT(customers.WHERE(market_segment == 'BUILDING')), " + " n3=COUNT(customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + " n4=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600) & STARTSWITH(phone, '11'))), " + " n5=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + " n6=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600) & STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [30142], + "n3": [268], + "n4": [54], + "n5": [1261], + "n6": [19], + } + ), + "count_multiple_filters_b", + ), + id="count_multiple_filters_b", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers), " + " n2=COUNT(customers.WHERE(market_segment == 'BUILDING')), " + " n3=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n4=COUNT(customers.WHERE(STARTSWITH(phone, '11'))), " + " n5=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + " n6=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [150000], + "n2": [30142], + "n3": [1379], + "n4": [5975], + "n5": [1261], + "n6": [19], + } + ), + "count_multiple_filters_c", + ), + id="count_multiple_filters_c", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt new file mode 100644 index 000000000..88351383e --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt new file mode 100644 index 000000000..47f23d552 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -0,0 +1,24 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt new file mode 100644 index 000000000..9d21f4e0c --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -0,0 +1,23 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) From 97600a1571ce9ec76c029304664d2d75f967bee9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 26 Jan 2026 12:07:35 -0800 Subject: [PATCH 03/12] Fixed edge case with children steps, updated test files, need to add comments --- pydough/conversion/hybrid_filter_merger.py | 56 ++++++++++++++----- pydough/conversion/hybrid_translator.py | 8 +-- tests/test_plan_refsols/common_prefix_n.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_o.txt | 30 +++++----- .../count_multiple_filters_a.txt | 12 ++-- .../count_multiple_filters_b.txt | 30 +++------- .../count_multiple_filters_c.txt | 26 +-------- tests/test_plan_refsols/redundant_has.txt | 2 +- .../defog_restaurants_gen11_ansi.sql | 16 +----- .../defog_restaurants_gen11_mysql.sql | 16 +----- .../defog_restaurants_gen11_postgres.sql | 16 +----- .../defog_restaurants_gen11_snowflake.sql | 16 +----- .../defog_restaurants_gen11_sqlite.sql | 16 +----- 13 files changed, 93 insertions(+), 179 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index fedf8ccb0..5b424258e 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -6,6 +6,7 @@ """ import copy +from typing import TYPE_CHECKING import pydough.pydough_operators as pydop from pydough.qdag import Literal @@ -13,6 +14,7 @@ from .hybrid_connection import ConnectionType from .hybrid_expressions import ( + HybridChildRefExpr, HybridExpr, HybridFunctionExpr, HybridLiteralExpr, @@ -24,12 +26,18 @@ ) from .hybrid_tree import HybridTree +if TYPE_CHECKING: + from .hybrid_translator import HybridTranslator + class HybridFilterMerger: """ TODO """ + def __init__(self, translator: "HybridTranslator") -> None: + self.translator: HybridTranslator = translator + def merge_filters(self, tree: HybridTree) -> None: """ TODO @@ -40,31 +48,27 @@ def merge_filters(self, tree: HybridTree) -> None: # not ONLY_MATCH. mergeable_children: set[int] = self.identify_mergeable_children(tree) + # TODO ADD COMMENT child_filters: list[set[HybridExpr]] = [ self.get_final_filters(child.subtree) for child in tree.children ] + # TODO ADD COMMENT child_isomorphisms: list[set[int]] = self.get_child_isomorphisms(tree) + # TODO ADD COMMENT filter_dag: list[int | None] = self.make_filter_dag( mergeable_children, child_filters, child_isomorphisms ) - print() - print(tree) - print(mergeable_children) - print(child_filters) - print(child_isomorphisms) - print(filter_dag) - + # TODO ADD COMMENT + replacement_map: dict[HybridExpr, HybridExpr] = {} for source_idx, target_idx in enumerate(filter_dag): if target_idx is None: continue - print(source_idx, "->", target_idx) extra_filters: set[HybridExpr] = ( child_filters[source_idx] - child_filters[target_idx] ) - print(extra_filters) assert len(extra_filters) > 0 new_cond: HybridExpr if len(extra_filters) == 1: @@ -84,13 +88,34 @@ def merge_filters(self, tree: HybridTree) -> None: ], NumericType(), ) - sum_expr: HybridExpr = HybridFunctionExpr( + sum_expr: HybridFunctionExpr = HybridFunctionExpr( pydop.SUM, [numeric_expr], - BooleanType(), + NumericType(), + ) + agg_name: str = self.translator.gen_agg_name(tree.children[target_idx]) + tree.children[target_idx].aggs[agg_name] = sum_expr + agg_ref: HybridExpr = HybridChildRefExpr( + agg_name, target_idx, NumericType() ) - print(sum_expr) - # agg_name: str = tree.gen_agg_name(tree.children[target_idx]) + old_agg_ref = HybridChildRefExpr( + next(iter(tree.children[source_idx].aggs)), + source_idx, + NumericType(), + ) + replacement_map[old_agg_ref] = agg_ref + tree.children[target_idx].max_steps = min( + tree.children[target_idx].max_steps, + tree.children[source_idx].max_steps, + ) + tree.children[target_idx].min_steps = min( + tree.children[target_idx].min_steps, + tree.children[source_idx].min_steps, + ) + + # TODO ADD COMMENT + for operation in tree.pipeline: + operation.replace_expressions(replacement_map) # Run the procedure recursively on the parent tree and the child # subtrees. @@ -187,6 +212,7 @@ def make_filter_dag( if dag[idx] is not None: while True: target_idx: int | None = dag[idx] - if target_idx is not None: - dag[idx] = dag[target_idx] + if target_idx is None or dag[target_idx] is None: + break + dag[idx] = dag[target_idx] return dag diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index c9cc27488..22f4adb29 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1691,7 +1691,7 @@ def run_filter_merging(self, hybrid: "HybridTree") -> None: Args: `hybrid`: The hybrid tree to run filter merging on. """ - filter_merger: HybridFilterMerger = HybridFilterMerger() + filter_merger: HybridFilterMerger = HybridFilterMerger(self) filter_merger.merge_filters(hybrid) def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: @@ -1718,14 +1718,8 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - print() - print("BEFORE FILTER MERGING") - print(hybrid) # 5. Run the filter-merging procedure. self.run_filter_merging(hybrid) - print() - print("AFTER FILTER MERGING") - print(hybrid) # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 0ac7dc610..2cf16811e 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,20 +1,18 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_agg, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_agg': t1.sum_agg, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'n_rows': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index fe0307f7b..f9f6f44f3 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,22 +1,18 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_sum_n_rows)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_n_rows': t0.sum_sum_sum_n_rows}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_agg)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t1.sum_sum_agg, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_sum_n_rows != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_n_rows': t0.sum_sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_retailprice': t1.p_retailprice, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t1.n_rows}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'n_rows': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=sum_sum_agg != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt index 88351383e..c4182bc18 100644 --- a/tests/test_plan_refsols/count_multiple_filters_a.txt +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -1,8 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) +ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 47f23d552..281cd5a3c 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,24 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_6), ('n4', agg_7), ('n5', agg_8), ('n6', agg_9)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'agg_6': t0.agg_6, 'agg_7': t0.agg_7, 'agg_8': t1.agg_8, 'agg_9': t0.agg_9, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'agg_6': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'agg_7': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'agg_9': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'agg_8': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index 9d21f4e0c..78d1de0ad 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,23 +1,3 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) +ROOT(columns=[('n1', n_rows), ('n2', n2), ('n3', n3), ('n4', n4), ('n5', n5), ('n6', n6)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n3': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric), 1:numeric, 0:numeric)), 'n4': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n5': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n6': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt index ab1dc1a3f..dafed22e4 100644 --- a/tests/test_plan_refsols/redundant_has.txt +++ b/tests/test_plan_refsols/redundant_has.txt @@ -1,4 +1,4 @@ -gROOT(columns=[('n', n_rows)], orderings=[]) +ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql index 832005452..af28c71cf 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql index 832005452..af28c71cf 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql index bf0e704bf..048ba9823 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - CAST(_s0.n_rows AS DOUBLE PRECISION) / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + CAST(SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql index 832005452..7a6db2f6b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(IFF(rating > 4.5, 1, 0)) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql index 4b5cce5d6..b99291f91 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - CAST(_s0.n_rows AS REAL) / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + CAST(SUM(IIF(rating > 4.5, 1, 0)) AS REAL) / COUNT(*) AS ratio +FROM main.restaurant From f2db22244f59e9bbd6434f16de55f8d7cb86fbbf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 27 Jan 2026 10:42:38 -0800 Subject: [PATCH 04/12] Resolving test issues [RUN CI] --- tests/test_masked_sqlite.py | 6 +---- .../count_multiple_filters_a_ansi.sql | 6 +++++ .../count_multiple_filters_a_mysql.sql | 6 +++++ .../count_multiple_filters_a_postgres.sql | 6 +++++ .../count_multiple_filters_a_snowflake.sql | 6 +++++ .../count_multiple_filters_a_sqlite.sql | 6 +++++ .../count_multiple_filters_b_ansi.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_mysql.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_postgres.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_snowflake.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_sqlite.sql | 26 +++++++++++++++++++ .../count_multiple_filters_c_ansi.sql | 17 ++++++++++++ .../count_multiple_filters_c_mysql.sql | 17 ++++++++++++ .../count_multiple_filters_c_postgres.sql | 17 ++++++++++++ .../count_multiple_filters_c_snowflake.sql | 17 ++++++++++++ .../count_multiple_filters_c_sqlite.sql | 17 ++++++++++++ tests/test_sql_refsols/redundant_has_ansi.sql | 7 +++++ .../test_sql_refsols/redundant_has_mysql.sql | 7 +++++ .../redundant_has_postgres.sql | 7 +++++ .../redundant_has_snowflake.sql | 7 +++++ .../test_sql_refsols/redundant_has_sqlite.sql | 7 +++++ 21 files changed, 281 insertions(+), 5 deletions(-) create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_sqlite.sql diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 731aa14c9..536cf78c6 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1553,10 +1553,6 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", @@ -1565,8 +1561,8 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", diff --git a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql new file mode 100644 index 000000000..ba0018b62 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql new file mode 100644 index 000000000..a43c29266 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.CUSTOMER +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql new file mode 100644 index 000000000..ba0018b62 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql new file mode 100644 index 000000000..6adc35063 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql new file mode 100644 index 000000000..703d56924 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql new file mode 100644 index 000000000..92f7c16eb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql new file mode 100644 index 000000000..287a8d2e6 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.CUSTOMER + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.CUSTOMER + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql new file mode 100644 index 000000000..92f7c16eb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql new file mode 100644 index 000000000..bc13323f9 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_7, + SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql new file mode 100644 index 000000000..7cddc1ce8 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_7, + SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql new file mode 100644 index 000000000..3861720ce --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql new file mode 100644 index 000000000..a3e0f4986 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.CUSTOMER diff --git a/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql new file mode 100644 index 000000000..3861720ce --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql new file mode 100644 index 000000000..609166553 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, + SUM(IFF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS n4, + SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS n5, + SUM( + IFF( + STARTSWITH(c_phone, '11') + AND c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING', + 1, + 0 + ) + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql new file mode 100644 index 000000000..8cabf3bfd --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, + SUM(IIF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS n4, + SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS n5, + SUM( + IIF( + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%', + 1, + 0 + ) + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/redundant_has_ansi.sql b/tests/test_sql_refsols/redundant_has_ansi.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_mysql.sql b/tests/test_sql_refsols/redundant_has_mysql.sql new file mode 100644 index 000000000..9dac38536 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +JOIN tpch.NATION AS NATION + ON CUSTOMER.c_nationkey = NATION.n_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_postgres.sql b/tests/test_sql_refsols/redundant_has_postgres.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_snowflake.sql b/tests/test_sql_refsols/redundant_has_snowflake.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_sqlite.sql b/tests/test_sql_refsols/redundant_has_sqlite.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' From 7c0ab19625f248ac097785fbc9abc75c44030996 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Tue, 27 Jan 2026 12:51:32 -0800 Subject: [PATCH 05/12] switch redundant has and add tests --- pydough/conversion/relational_converter.py | 9 +- tests/test_pipeline_tpch_custom.py | 117 +++++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c67d4117d..a6f042e84 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -731,10 +731,17 @@ def handle_children( child_output = self.apply_aggregations( child, child_output, child.subtree.agg_keys ) + # Optimize SEMI to INNER for singular subtrees + join_type = child.connection_type.join_type + if ( + child.connection_type == ConnectionType.SEMI + and child.subtree.is_singular() + ): + join_type = JoinType.INNER context = self.join_outputs( context, child_output, - child.connection_type.join_type, + join_type, cardinality, child.reverse_cardinality, join_keys, diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 1f1392077..6caac393e 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2739,6 +2739,123 @@ ), id="double_cross", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [30183], + } + ), + "redundant_has", + ), + id="redundant_has", + ), + # Nested HAS on singular chain (supplier -> nation -> region), both should optimize to INNER + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(HAS(region.WHERE(name == 'AFRICA')))))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [1955], + } + ), + "redundant_has_nested", + ), + id="redundant_has_nested", + ), + # HAS on plural relationship (orders) - should NOT optimize, stays SEMI + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(orders.WHERE(total_price > 400000)))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [3533], + } + ), + "redundant_has_on_plural", + ), + id="redundant_has_on_plural", + ), + # HAS on singular relationship with additional filter + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(region.name == 'EUROPE')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [1987], + } + ), + "redundant_has_singular_chain", + ), + id="redundant_has_singular_chain", + ), + # HAS on plural relationship (lineitems) - should NOT optimize, stays SEMI + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(orders.WHERE(HAS(lines.WHERE(quantity > 49)))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [115066], + } + ), + "redundant_has_on_plural_lineitems", + ), + id="redundant_has_on_plural_lineitems", + ), + # HASNOT on singular relationship - should optimize to ANTI join or similar + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HASNOT(nation.WHERE(region.name == 'AFRICA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [8045], + } + ), + "redundant_has_not_on_singular", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_not_on_singular", + ), + # HAS without WHERE filter on singular - should optimize to INNER + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [150000], + } + ), + "redundant_has_no_filter_singular", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_no_filter_singular", + ), + # HAS on singular within plural context - orders whose customer is from ASIA + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(orders.WHERE(HAS(customer.WHERE(nation.region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [301740], + } + ), + "redundant_has_singular_in_plural_context", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_singular_in_plural_context", + ), pytest.param( PyDoughPandasTest( bad_child_reuse_1, From f074118f11840975ea90bb171a97032d77da51a9 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Tue, 27 Jan 2026 12:52:22 -0800 Subject: [PATCH 06/12] add test files --- tests/test_plan_refsols/redundant_has.txt | 8 ++++++++ tests/test_plan_refsols/redundant_has_nested.txt | 8 ++++++++ .../redundant_has_on_plural.txt | 6 ++++++ .../redundant_has_on_plural_lineitems.txt | 6 ++++++ .../redundant_has_singular_chain.txt | 8 ++++++++ tests/test_sql_refsols/redundant_has_ansi.sql | 7 +++++++ tests/test_sql_refsols/redundant_has_mysql.sql | 7 +++++++ .../redundant_has_nested_ansi.sql | 7 +++++++ .../redundant_has_nested_mysql.sql | 7 +++++++ .../redundant_has_nested_postgres.sql | 7 +++++++ .../redundant_has_nested_snowflake.sql | 7 +++++++ .../redundant_has_nested_sqlite.sql | 7 +++++++ .../redundant_has_on_plural_ansi.sql | 5 +++++ .../redundant_has_on_plural_lineitems_ansi.sql | 5 +++++ .../redundant_has_on_plural_lineitems_mysql.sql | 16 ++++++++++++++++ ...edundant_has_on_plural_lineitems_postgres.sql | 16 ++++++++++++++++ ...dundant_has_on_plural_lineitems_snowflake.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_lineitems_sqlite.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_mysql.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_postgres.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_snowflake.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_sqlite.sql | 16 ++++++++++++++++ .../test_sql_refsols/redundant_has_postgres.sql | 7 +++++++ .../redundant_has_singular_chain_ansi.sql | 7 +++++++ .../redundant_has_singular_chain_mysql.sql | 7 +++++++ .../redundant_has_singular_chain_postgres.sql | 7 +++++++ .../redundant_has_singular_chain_snowflake.sql | 7 +++++++ .../redundant_has_singular_chain_sqlite.sql | 7 +++++++ .../test_sql_refsols/redundant_has_snowflake.sql | 7 +++++++ tests/test_sql_refsols/redundant_has_sqlite.sql | 7 +++++++ 30 files changed, 279 insertions(+) create mode 100644 tests/test_plan_refsols/redundant_has.txt create mode 100644 tests/test_plan_refsols/redundant_has_nested.txt create mode 100644 tests/test_plan_refsols/redundant_has_on_plural.txt create mode 100644 tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt create mode 100644 tests/test_plan_refsols/redundant_has_singular_chain.txt create mode 100644 tests/test_sql_refsols/redundant_has_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_sqlite.sql diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt new file mode 100644 index 000000000..dafed22e4 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/redundant_has_nested.txt b/tests/test_plan_refsols/redundant_has_nested.txt new file mode 100644 index 000000000..19123d2f9 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_nested.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/redundant_has_on_plural.txt b/tests/test_plan_refsols/redundant_has_on_plural.txt new file mode 100644 index 000000000..b19af0652 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_on_plural.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=o_totalprice > 400000:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt b/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt new file mode 100644 index 000000000..8c74d53a4 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, columns={}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + FILTER(condition=l_quantity > 49:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/redundant_has_singular_chain.txt b/tests/test_plan_refsols/redundant_has_singular_chain.txt new file mode 100644 index 000000000..c8679898e --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_singular_chain.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_sql_refsols/redundant_has_ansi.sql b/tests/test_sql_refsols/redundant_has_ansi.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_mysql.sql b/tests/test_sql_refsols/redundant_has_mysql.sql new file mode 100644 index 000000000..9dac38536 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +JOIN tpch.NATION AS NATION + ON CUSTOMER.c_nationkey = NATION.n_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_nested_ansi.sql b/tests/test_sql_refsols/redundant_has_nested_ansi.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_mysql.sql b/tests/test_sql_refsols/redundant_has_nested_mysql.sql new file mode 100644 index 000000000..43c6d8d2e --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.SUPPLIER AS SUPPLIER +JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = SUPPLIER.s_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_postgres.sql b/tests/test_sql_refsols/redundant_has_nested_postgres.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_snowflake.sql b/tests/test_sql_refsols/redundant_has_nested_snowflake.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_sqlite.sql b/tests/test_sql_refsols/redundant_has_nested_sqlite.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql b/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql new file mode 100644 index 000000000..4b4b766b8 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_totalprice > 400000 diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql new file mode 100644 index 000000000..bbc3107da --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey AND lineitem.l_quantity > 49 diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql new file mode 100644 index 000000000..d8cbe6be9 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.LINEITEM + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.ORDERS AS ORDERS +LEFT JOIN _u_0 AS _u_0 + ON ORDERS.o_orderkey = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql b/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql new file mode 100644 index 000000000..05d325c6b --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.ORDERS + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +LEFT JOIN _u_0 AS _u_0 + ON CUSTOMER.c_custkey = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql b/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql b/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql b/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_postgres.sql b/tests/test_sql_refsols/redundant_has_postgres.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql b/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql b/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql new file mode 100644 index 000000000..53a22063b --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.SUPPLIER AS SUPPLIER +JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = SUPPLIER.s_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql b/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql b/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql b/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_snowflake.sql b/tests/test_sql_refsols/redundant_has_snowflake.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_sqlite.sql b/tests/test_sql_refsols/redundant_has_sqlite.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' From 7d72a7d185cb32b5119a49ff924afd2cb170f8ed Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 27 Jan 2026 13:00:55 -0800 Subject: [PATCH 07/12] Added more testing layers and window bugfixes --- pydough/conversion/hybrid_filter_merger.py | 4 +- pydough/conversion/hybrid_translator.py | 4 +- tests/test_pipeline_tpch_custom.py | 63 ++++++++++++ .../count_multiple_filters_d.txt | 36 +++++++ .../count_multiple_filters_e.txt | 10 ++ .../count_multiple_filters_d_ansi.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_mysql.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_postgres.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_snowflake.sql | 95 ++++++++++++++++++ .../count_multiple_filters_d_sqlite.sql | 96 +++++++++++++++++++ .../count_multiple_filters_e_ansi.sql | 36 +++++++ .../count_multiple_filters_e_mysql.sql | 36 +++++++ .../count_multiple_filters_e_postgres.sql | 36 +++++++ .../count_multiple_filters_e_snowflake.sql | 36 +++++++ .../count_multiple_filters_e_sqlite.sql | 36 +++++++ 15 files changed, 773 insertions(+), 3 deletions(-) create mode 100644 tests/test_plan_refsols/count_multiple_filters_d.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_e.txt create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 5b424258e..1ed9083f0 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -144,9 +144,9 @@ def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: result: set[HybridExpr] = set() for operation in reversed(tree.pipeline): if isinstance(operation, HybridFilter): - result.update(operation.condition.get_conjunction()) if operation.condition.contains_window_functions(): break + result.update(operation.condition.get_conjunction()) elif isinstance(operation, HybridLimit): break elif isinstance(operation, HybridCalculate): @@ -180,9 +180,9 @@ def get_filter_stripped_form(self, tree: HybridTree) -> str: stripped_tree = copy.deepcopy(tree) for idx, operation in reversed(list(enumerate(stripped_tree.pipeline))): if isinstance(operation, HybridFilter): - stripped_tree.pipeline.pop(idx) if operation.condition.contains_window_functions(): break + stripped_tree.pipeline.pop(idx) elif isinstance(operation, HybridLimit): break elif isinstance(operation, HybridCalculate): diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index 2d5f60175..bf3617aad 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1739,8 +1739,10 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - # 5. Run the filter-merging procedure. + # 5. Run the filter-merging procedure, then re-run ejecting aggregate + # inputs to clean up any new aggregates created by filter merging. self.run_filter_merging(hybrid) + self.eject_aggregate_inputs(hybrid) # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index e62bdbd1f..d67037a54 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2770,6 +2770,69 @@ ), id="count_multiple_filters_c", ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c2 = customers.WHERE(nation.name == 'GERMANY').WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c3 = customers.WHERE(nation.name == 'GERMANY')\n" + "c4 = customers.WHERE(nation.name == 'CHINA').WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c5 = customers.WHERE((PERCENTILE(by=account_balance.ASC()) == 100) & (nation.name == 'CHINA'))\n" + "c6 = customers.WHERE(nation.name == 'CHINA')\n" + "c6 = customers.WHERE(nation.name == 'CHINA')\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + " n4=COUNT(c4), " + " n5=COUNT(c5), " + " n6=COUNT(c6), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1500], + "n2": [59], + "n3": [5908], + "n4": [60], + "n5": [57], + "n6": [6024], + } + ), + "count_multiple_filters_d", + ), + id="count_multiple_filters_d", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders), " + " n3=COUNT(nations.customers.orders.WHERE(order_priority == '1-URGENT')), " + " n4=COUNT(nations.customers.orders.WHERE(order_priority == '2-HIGH')), " + " n5=COUNT(nations.customers.orders.WHERE(order_priority == '3-MEDIUM')), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [298994, 299103, 301740, 303286, 296877], + "n3": [59767, 59902, 60166, 60373, 60135], + "n4": [59511, 60232, 60246, 60901, 59201], + "n5": [59597, 59230, 60485, 60375, 59036], + } + ), + "count_multiple_filters_e", + ), + id="count_multiple_filters_e", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/count_multiple_filters_d.txt b/tests/test_plan_refsols/count_multiple_filters_d.txt new file mode 100644 index 000000000..b80d0ef1c --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_d.txt @@ -0,0 +1,36 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_agg_1': t1.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt new file mode 100644 index 000000000..548c6def4 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_21, 0:numeric)), ('n5', DEFAULT_TO(sum_sum_expr_22, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_21': t1.sum_sum_expr_21, 'sum_sum_expr_22': t1.sum_sum_expr_22}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_21': SUM(sum_expr_21), 'sum_sum_expr_22': SUM(sum_expr_22)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_21': t1.sum_expr_21, 'sum_expr_22': t1.sum_expr_22}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(o_orderpriority == '1-URGENT':string, 1:numeric, 0:numeric)), 'sum_expr_21': SUM(IFF(o_orderpriority == '2-HIGH':string, 1:numeric, 0:numeric)), 'sum_expr_22': SUM(IFF(o_orderpriority == '3-MEDIUM':string, 1:numeric, 0:numeric))}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql new file mode 100644 index 000000000..479f86891 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql @@ -0,0 +1,96 @@ +WITH _t1 AS ( + SELECT + 1 AS "_" + FROM tpch.customer + QUALIFY + NTILE(100) OVER (ORDER BY c_acctbal NULLS LAST) = 100 +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t1 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t4 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s0.c_acctbal NULLS LAST) = 100 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t4 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t10 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s8.c_acctbal NULLS LAST) = 100 +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t10 +), _t14 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s12.c_acctbal NULLS LAST) = 100 + AND nation.n_name = 'CHINA' +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t14 +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql new file mode 100644 index 000000000..faae8bbfb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN c_acctbal IS NULL THEN 1 ELSE 0 END, c_acctbal) AS _w + FROM tpch.CUSTOMER +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.CUSTOMER +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.NATION + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN _s0.c_acctbal IS NULL THEN 1 ELSE 0 END, _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.CUSTOMER +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.NATION + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN _s8.c_acctbal IS NULL THEN 1 ELSE 0 END, _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + NATION.n_name, + NTILE(100) OVER (ORDER BY CASE WHEN _s12.c_acctbal IS NULL THEN 1 ELSE 0 END, _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = _s12.c_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql new file mode 100644 index 000000000..869decb54 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY c_acctbal) AS _w + FROM tpch.customer +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + nation.n_name, + NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql new file mode 100644 index 000000000..4dbe53347 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql @@ -0,0 +1,95 @@ +WITH _t1 AS ( + SELECT + 1 AS "_" + FROM tpch.customer + QUALIFY + NTILE(100) OVER (ORDER BY c_acctbal) = 100 +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t1 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t4 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s0.c_acctbal) = 100 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t4 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t10 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s8.c_acctbal) = 100 +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t10 +), _t14 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s12.c_acctbal) = 100 AND nation.n_name = 'CHINA' +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t14 +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql new file mode 100644 index 000000000..869decb54 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY c_acctbal) AS _w + FROM tpch.customer +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + nation.n_name, + NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql new file mode 100644 index 000000000..676f8e1d1 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql new file mode 100644 index 000000000..440100388 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.ORDERS + GROUP BY + 1 +), _s5 AS ( + SELECT + NATION.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.NATION AS NATION + JOIN tpch.CUSTOMER AS CUSTOMER + ON CUSTOMER.c_nationkey = NATION.n_nationkey + LEFT JOIN _s3 AS _s3 + ON CUSTOMER.c_custkey = _s3.o_custkey + GROUP BY + 1 +) +SELECT + REGION.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.REGION AS REGION +JOIN _s5 AS _s5 + ON REGION.r_regionkey = _s5.n_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql new file mode 100644 index 000000000..676f8e1d1 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql new file mode 100644 index 000000000..1a13d2d5b --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(IFF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, + SUM(IFF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, + SUM(IFF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql new file mode 100644 index 000000000..13f47047c --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(IIF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, + SUM(IIF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, + SUM(IIF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey From 6c06d74d6a07f9738345048e0ab407bae8e0788e Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Wed, 28 Jan 2026 10:26:20 -0800 Subject: [PATCH 08/12] [run CI][run SF][run mysql][run postgres] BIRD menu_5556 and update other tests --- pydough/unqualified/qualification.py | 3 +- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 24 ++++++++++++++++ tests/test_plan_refsols/common_prefix_ab.txt | 4 +-- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/menu_5556.txt | 12 ++++++++ .../defog_broker_adv8_mysql.sql | 17 +++-------- .../defog_broker_adv8_postgres.sql | 17 +++-------- .../defog_broker_adv8_snowflake.sql | 17 +++-------- .../defog_broker_adv8_sqlite.sql | 17 +++-------- tests/test_sql_refsols/menu_5556_ansi.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_mysql.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_postgres.sql | 28 +++++++++++++++++++ .../test_sql_refsols/menu_5556_snowflake.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_sqlite.sql | 28 +++++++++++++++++++ 15 files changed, 198 insertions(+), 57 deletions(-) create mode 100644 tests/test_plan_refsols/menu_5556.txt create mode 100644 tests/test_sql_refsols/menu_5556_ansi.sql create mode 100644 tests/test_sql_refsols/menu_5556_mysql.sql create mode 100644 tests/test_sql_refsols/menu_5556_postgres.sql create mode 100644 tests/test_sql_refsols/menu_5556_snowflake.sql create mode 100644 tests/test_sql_refsols/menu_5556_sqlite.sql diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index c1566b8a6..e6e643cb5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -595,7 +595,8 @@ def qualify_access( ) if ( - isinstance(qualified_parent, GlobalContext) + isinstance(unqualified_parent, UnqualifiedRoot) + and isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name and not is_child ) or ( diff --git a/tests/conftest.py b/tests/conftest.py index 3bf635206..8334590e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -682,7 +682,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 5f88bd717..9570029f2 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -113,6 +113,30 @@ ), id="wdi_albania_footnotes_1978", ), + pytest.param( + PyDoughPandasTest( + """ +result = menu.menu.WHERE( + HAS(menupages.menuitems.dish.WHERE(LOWER(name) == "baked apples with cream")) + ).CALCULATE( + sponsor_name=sponsor, + max_item_price=MAX(menupages.menuitems.price) + ).TOP_K( + 1, by=max_item_price.DESC() + ).CALCULATE( + sponsor=sponsor_name + ) + """, + "menu", + lambda: pd.DataFrame( + { + "sponsor": ["MURRAY HILL HOTEL"], + } + ), + "menu_5556", + ), + id="menu_5556", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 08996397b..93550a02f 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 05678bd68..946eb0b41 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/menu_5556.txt b/tests/test_plan_refsols/menu_5556.txt new file mode 100644 index 000000000..1b56e06ba --- /dev/null +++ b/tests/test_plan_refsols/menu_5556.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('sponsor', sponsor)], orderings=[(max_price):desc_last], limit=1:numeric) + JOIN(condition=t0.id == t1.menu_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_price': t1.max_price, 'sponsor': t0.sponsor}) + SCAN(table=main.Menu, columns={'id': id, 'sponsor': sponsor}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_price': max_price, 'menu_id': menu_id}) + AGGREGATE(keys={'menu_id': menu_id}, aggregations={'max_price': MAX(price), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.dish_id == t1.id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'menu_id': t0.menu_id, 'n_rows': t1.n_rows, 'price': t0.price}) + JOIN(condition=t0.id == t1.menu_page_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'dish_id': t1.dish_id, 'menu_id': t0.menu_id, 'price': t1.price}) + SCAN(table=main.MenuPage, columns={'id': id, 'menu_id': menu_id}) + SCAN(table=main.MenuItem, columns={'dish_id': dish_id, 'menu_page_id': menu_page_id, 'price': price}) + PROJECT(columns={'id': id, 'n_rows': 1:numeric}) + FILTER(condition=LOWER(name) == 'baked apples with cream':string, columns={'id': id}) + SCAN(table=main.Dish, columns={'id': id, 'name': name}) diff --git a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql index 17fb88b4a..743e4da74 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbCustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbTransaction.sbtxamount), 0) AS total_amount FROM main.sbTransaction AS sbTransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbTransaction.sbtxcustid +JOIN main.sbCustomer AS sbCustomer + ON LOWER(sbCustomer.sbcustcountry) = 'usa' + AND sbCustomer.sbcustid = sbTransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbTransaction.sbtxdatetime < CAST(DATE_SUB( + sbTransaction.sbtxdatetime < CAST(DATE_SUB( CURRENT_TIMESTAMP(), INTERVAL ( ( diff --git a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql index 26080ce78..f8c0db2f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', CURRENT_TIMESTAMP - CAST(( EXTRACT(DOW FROM CURRENT_TIMESTAMP) + 6 diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql index 0a72bb5a5..eb1b10906 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', DATEADD( DAY, diff --git a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql index b94aec102..f29c3de35 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE( + sbtransaction.sbtxdatetime < DATE( 'now', '-' || CAST(( CAST(STRFTIME('%w', DATETIME('now')) AS INTEGER) + 6 diff --git a/tests/test_sql_refsols/menu_5556_ansi.sql b/tests/test_sql_refsols/menu_5556_ansi.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_ansi.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_mysql.sql b/tests/test_sql_refsols/menu_5556_mysql.sql new file mode 100644 index 000000000..0e0ec128a --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_mysql.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.Dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + MenuPage.menu_id, + MAX(MenuItem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.MenuPage AS MenuPage + JOIN main.MenuItem AS MenuItem + ON MenuItem.menu_page_id = MenuPage.id + LEFT JOIN _s3 AS _s3 + ON MenuItem.dish_id = _s3.id + GROUP BY + 1 +) +SELECT + Menu.sponsor +FROM main.Menu AS Menu +JOIN _t1 AS _t1 + ON Menu.id = _t1.menu_id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_postgres.sql b/tests/test_sql_refsols/menu_5556_postgres.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_postgres.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_snowflake.sql b/tests/test_sql_refsols/menu_5556_snowflake.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_snowflake.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_sqlite.sql b/tests/test_sql_refsols/menu_5556_sqlite.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_sqlite.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 From 307c60138bf839c8d6660bb81cd59ea59a85352a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:05:59 -0800 Subject: [PATCH 09/12] Resolving bugs and updating tests [RUN CI] --- pydough/conversion/hybrid_filter_merger.py | 2 ++ pydough/conversion/hybrid_translator.py | 3 -- pydough/conversion/hybrid_tree.py | 4 +++ .../conversion/relational_simplification.py | 26 +++++++++++++-- .../database_connectors/database_connector.py | 1 - tests/test_metadata/masked_graphs.json | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- tests/test_plan_refsols/common_prefix_n.txt | 14 ++++---- tests/test_plan_refsols/common_prefix_o.txt | 16 ++++----- tests/test_plan_refsols/correl_14.txt | 2 +- tests/test_plan_refsols/correl_15.txt | 2 +- tests/test_plan_refsols/correl_35.txt | 23 +++++++------ .../count_multiple_filters_a.txt | 4 +-- .../count_multiple_filters_b.txt | 8 ++--- .../count_multiple_filters_c.txt | 4 +-- .../count_multiple_filters_e.txt | 2 +- .../cryptbank_general_join_01_raw.txt | 2 +- .../cryptbank_general_join_01_rewrite.txt | 2 +- .../cryptbank_general_join_02_raw.txt | 2 +- .../cryptbank_general_join_02_rewrite.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 2 +- .../defog_test_functions.py | 6 ++-- tests/test_sql_refsols/correl_14_sqlite.sql | 2 +- tests/test_sql_refsols/correl_15_sqlite.sql | 2 +- tests/test_sql_refsols/correl_31_sqlite.sql | 4 +-- tests/test_sql_refsols/correl_35_sqlite.sql | 33 ++++++------------- .../count_multiple_filters_a_ansi.sql | 2 +- .../count_multiple_filters_a_mysql.sql | 2 +- .../count_multiple_filters_a_snowflake.sql | 2 +- .../count_multiple_filters_a_sqlite.sql | 2 +- .../count_multiple_filters_b_ansi.sql | 20 +++++------ .../count_multiple_filters_b_mysql.sql | 20 +++++------ .../count_multiple_filters_b_postgres.sql | 20 +++++------ .../count_multiple_filters_b_snowflake.sql | 20 +++++------ .../count_multiple_filters_b_sqlite.sql | 20 +++++------ .../count_multiple_filters_c_ansi.sql | 20 +++++------ .../count_multiple_filters_c_mysql.sql | 20 +++++------ .../count_multiple_filters_c_snowflake.sql | 22 +++++-------- .../count_multiple_filters_c_sqlite.sql | 20 +++++------ .../count_multiple_filters_e_ansi.sql | 6 ++-- .../count_multiple_filters_e_mysql.sql | 6 ++-- .../count_multiple_filters_e_snowflake.sql | 6 ++-- .../count_multiple_filters_e_sqlite.sql | 6 ++-- .../defog_restaurants_gen11_ansi.sql | 2 +- .../defog_restaurants_gen11_mysql.sql | 2 +- .../defog_restaurants_gen11_snowflake.sql | 2 +- .../defog_restaurants_gen11_sqlite.sql | 2 +- .../defog_restaurants_gen14_ansi.sql | 8 ++++- .../defog_restaurants_gen14_mysql.sql | 8 ++++- .../defog_restaurants_gen14_postgres.sql | 8 ++++- .../defog_restaurants_gen14_snowflake.sql | 8 ++++- .../defog_restaurants_gen14_sqlite.sql | 8 ++++- .../defog_restaurants_gen15_ansi.sql | 2 +- .../defog_restaurants_gen15_mysql.sql | 2 +- .../defog_restaurants_gen15_postgres.sql | 2 +- .../defog_restaurants_gen15_sqlite.sql | 2 +- .../defog_restaurants_gen8_ansi.sql | 2 +- .../defog_restaurants_gen8_mysql.sql | 2 +- .../defog_restaurants_gen8_snowflake.sql | 2 +- .../defog_restaurants_gen8_sqlite.sql | 2 +- 60 files changed, 236 insertions(+), 214 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 1ed9083f0..b6280b219 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -117,6 +117,8 @@ def merge_filters(self, tree: HybridTree) -> None: for operation in tree.pipeline: operation.replace_expressions(replacement_map) + tree.remove_dead_children(set()) + # Run the procedure recursively on the parent tree and the child # subtrees. if tree.parent is not None: diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index bf3617aad..21d554f5c 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1746,7 +1746,4 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) - # 8. Remove any dead children in the hybrid tree that are no longer - # being used. - hybrid.remove_dead_children(set()) return hybrid diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index aeea1bf38..ab716efb5 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -980,6 +980,10 @@ def remove_dead_children(self, must_remove: set[int]) -> dict[int, int]: ) or self.children[child_idx].connection_type.is_anti: children_to_delete.discard(child_idx) + print() + print(self) + print(children_to_delete) + if len(children_to_delete) == 0: return {i: i for i in range(len(self.children))} diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 45a40cd16..103de3e2d 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -870,8 +870,8 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null or non-negative if their - # first argument is. + # All of these operators are non-null, non-negative, or positive if + # their first argument is. case ( pydop.SUM | pydop.AVG @@ -882,8 +882,28 @@ def simplify_function_call( | pydop.QUANTILE ): output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True + not_null=True, + not_negative=True, + positive=True, ) + if expr.op == pydop.SUM: + if ( + isinstance(expr.inputs[0], CallExpression) + and expr.inputs[0].op == pydop.IFF + ): + # SUM(IFF(cond, 1, 0)) -> SUM(cond) + cond_arg: RelationalExpression = expr.inputs[0].inputs[0] + first_arg: RelationalExpression = expr.inputs[0].inputs[1] + second_arg: RelationalExpression = expr.inputs[0].inputs[2] + if ( + isinstance(first_arg, LiteralExpression) + and first_arg.value in (1, 1.0, True) + and isinstance(second_arg, LiteralExpression) + and second_arg.value in (0, 0.0, False) + ): + output_expr = CallExpression( + pydop.SUM, expr.data_type, [cond_arg] + ) # INTEGER(x) -> x if x is a literal integer. Also simplify for # booleans. diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index 303c7581c..b34189442 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -52,7 +52,6 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: try: self.cursor.execute(sql) except Exception as e: - breakpoint() print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") raise pydough.active_session.error_builder.sql_runtime_failure( sql, e, True diff --git a/tests/test_metadata/masked_graphs.json b/tests/test_metadata/masked_graphs.json index 7ff27c758..8f61d3508 100644 --- a/tests/test_metadata/masked_graphs.json +++ b/tests/test_metadata/masked_graphs.json @@ -384,7 +384,7 @@ "original parent": "branches", "original property": "same_state_customers", "singular": false, - "always matches": true, + "always matches": false, "description": "All branches located in the same state as the customer", "synonyms": ["branches in same state", "local branches"] } diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 6c99fbc08..6c4ccc09c 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 2cf16811e..1295aedfe 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_agg, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_agg': t1.sum_agg, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_sum_expr, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice, 'sum_sum_expr': t1.sum_sum_expr}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index f9f6f44f3..614e51530 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,13 +1,13 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_agg)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t1.sum_sum_agg, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_sum_expr)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_expr': t1.sum_sum_sum_expr}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + FILTER(condition=sum_sum_sum_expr != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 125f405ef..22b4f80d8 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 4b184d5bf..7681d553f 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index 4aaab2b34..af49adab5 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -10,15 +10,14 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=sum_n_rows != 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=YEAR(l_shipdate) == 1997:numeric & ISIN(MONTH(l_shipdate), [1, 2, 3]:array[numeric]), columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=YEAR(l_shipdate) == 1997:numeric & ISIN(MONTH(l_shipdate), [1, 2, 3]:array[numeric]), columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt index c4182bc18..58fde44d6 100644 --- a/tests/test_plan_refsols/count_multiple_filters_a.txt +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string)}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 281cd5a3c..3ebaf6592 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_6), ('n4', agg_7), ('n5', agg_8), ('n6', agg_9)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'agg_6': t0.agg_6, 'agg_7': t0.agg_7, 'agg_8': t1.agg_8, 'agg_9': t0.agg_9, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'agg_6': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'agg_7': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'agg_9': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'agg_8': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index 78d1de0ad..fed82fccd 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2), ('n3', n3), ('n4', n4), ('n5', n5), ('n6', n6)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n3': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric), 1:numeric, 0:numeric)), 'n4': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n5': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n6': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt index 548c6def4..b67bcbb02 100644 --- a/tests/test_plan_refsols/count_multiple_filters_e.txt +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -6,5 +6,5 @@ ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_r JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(o_orderpriority == '1-URGENT':string, 1:numeric, 0:numeric)), 'sum_expr_21': SUM(IFF(o_orderpriority == '2-HIGH':string, 1:numeric, 0:numeric)), 'sum_expr_22': SUM(IFF(o_orderpriority == '3-MEDIUM':string, 1:numeric, 0:numeric))}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_21': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_22': SUM(o_orderpriority == '3-MEDIUM':string)}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 641e01b1c..59649defb 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -6,7 +6,7 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_lo SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 641e01b1c..59649defb 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -6,7 +6,7 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_lo SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt index e79570ab4..2300e583e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt @@ -2,6 +2,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt index e79570ab4..2300e583e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt @@ -2,6 +2,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 6c99fbc08..6c4ccc09c 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_pydough_functions/defog_test_functions.py b/tests/test_pydough_functions/defog_test_functions.py index 8a2770086..600b4700c 100644 --- a/tests/test_pydough_functions/defog_test_functions.py +++ b/tests/test_pydough_functions/defog_test_functions.py @@ -2967,8 +2967,8 @@ def impl_defog_restaurants_gen14(): non-vegan food in San Francisco? Match food_type case insensitively """ sf_restaurants = restaurants.WHERE(LOWER(city_name) == "san francisco") - n_vegan = SUM(LOWER(sf_restaurants.food_type) == "vegan") - n_non_vegan = SUM(LOWER(sf_restaurants.food_type) != "vegan") + n_vegan = COUNT(sf_restaurants.WHERE(LOWER(food_type) == "vegan")) + n_non_vegan = COUNT(sf_restaurants) - n_vegan return Restaurants.CALCULATE( ratio=(n_vegan / KEEP_IF(n_non_vegan, n_non_vegan != 0)) ) @@ -2983,7 +2983,7 @@ def impl_defog_restaurants_gen15(): Los Angeles? """ la_restaurants = restaurants.WHERE(LOWER(city_name) == "los angeles") - n_la_italian = SUM(LOWER(la_restaurants.food_type) == "italian") + n_la_italian = COUNT(la_restaurants.WHERE(LOWER(food_type) == "italian")) n_la = COUNT(la_restaurants) return Restaurants.CALCULATE(ratio=(n_la_italian / KEEP_IF(n_la, n_la != 0))) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index cc4c44913..2340e279e 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,7 +1,7 @@ WITH _s4 AS ( SELECT partsupp.ps_suppkey, - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(NOT part.p_retailprice IS NULL) AS sum_expr, SUM(part.p_retailprice) AS sum_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index e93da2ce7..8fbf54a41 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -6,7 +6,7 @@ WITH _s0 AS ( SELECT partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(NOT part.p_retailprice IS NULL) AS sum_expr, SUM(part.p_retailprice) AS sum_p_retailprice FROM _s0 AS _s0 JOIN tpch.supplier AS supplier diff --git a/tests/test_sql_refsols/correl_31_sqlite.sql b/tests/test_sql_refsols/correl_31_sqlite.sql index 8376c2ada..69dbc030b 100644 --- a/tests/test_sql_refsols/correl_31_sqlite.sql +++ b/tests/test_sql_refsols/correl_31_sqlite.sql @@ -22,7 +22,7 @@ WITH _t1 AS ( 1 - lineitem.l_discount ) ELSE NULL - END AS expr_2 + END AS expr_7 FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' @@ -46,7 +46,7 @@ SELECT AVG(l_extendedprice * ( 1 - l_discount )) AS mean_rev, - AVG(expr_2) AS median_rev + AVG(expr_7) AS median_rev FROM _t1 GROUP BY n_nationkey diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index 16cab7b6a..fb6b735d7 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -4,12 +4,11 @@ WITH _s1 AS ( p_type FROM tpch.part ), _s10 AS ( - SELECT + SELECT DISTINCT customer.c_custkey, customer.c_nationkey, lineitem.l_partkey, - orders.o_orderpriority, - COUNT(*) AS n_rows + orders.o_orderpriority FROM tpch.customer AS customer JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1997 @@ -18,26 +17,15 @@ WITH _s1 AS ( ON CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND CAST(STRFTIME('%m', lineitem.l_shipdate) AS INTEGER) IN (1, 2, 3) AND lineitem.l_orderkey = orders.o_orderkey - GROUP BY - 1, - 2, - 3, - 4 -), _t3 AS ( - SELECT +), _s13 AS ( + SELECT DISTINCT _s10.c_custkey, _s10.c_nationkey, _s10.o_orderpriority, - _s11.p_type, - SUM(_s10.n_rows) AS sum_n_rows + _s11.p_type FROM _s10 AS _s10 JOIN _s1 AS _s11 ON _s10.l_partkey = _s11.p_partkey - GROUP BY - 1, - 2, - 3, - 4 ) SELECT COUNT(*) AS n @@ -49,11 +37,10 @@ JOIN tpch.supplier AS supplier JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 AND lineitem.l_orderkey = orders.o_orderkey -JOIN _t3 AS _t3 - ON _s1.p_type = _t3.p_type - AND _t3.c_custkey = orders.o_custkey - AND _t3.c_nationkey = supplier.s_nationkey - AND _t3.o_orderpriority = orders.o_orderpriority - AND _t3.sum_n_rows <> 0 +JOIN _s13 AS _s13 + ON _s1.p_type = _s13.p_type + AND _s13.c_custkey = orders.o_custkey + AND _s13.c_nationkey = supplier.s_nationkey + AND _s13.o_orderpriority = orders.o_orderpriority WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql index ba0018b62..5a423e7e5 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql index a43c29266..184cf2b26 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.CUSTOMER WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql index 6adc35063..7150be416 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 + COUNT_IF(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql index 703d56924..5a423e7e5 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql index 92f7c16eb..4f96f56d6 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql index 287a8d2e6..79cd17fa8 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.CUSTOMER WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.CUSTOMER WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql index 92f7c16eb..8ce688027 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_11, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql index bc13323f9..a6a0d4dfd 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_7, - SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + COUNT_IF(c_mktsegment = 'BUILDING') AS sum_expr, + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr_11, + COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql index 7cddc1ce8..4f96f56d6 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_7, - SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql index 3861720ce..1d410f585 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, - SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - CASE - WHEN c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - THEN 1 - ELSE 0 - END + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql index a3e0f4986..2926641ca 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, - SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - CASE - WHEN c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - THEN 1 - ELSE 0 - END + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.CUSTOMER diff --git a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql index 609166553..dc9d70b1d 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, - SUM(IFF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS n4, - SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS n5, - SUM( - IFF( - STARTSWITH(c_phone, '11') - AND c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING', - 1, - 0 - ) + COUNT_IF(c_mktsegment = 'BUILDING') AS n2, + COUNT_IF(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + COUNT_IF(STARTSWITH(c_phone, '11')) AS n4, + COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS n5, + COUNT_IF( + STARTSWITH(c_phone, '11') + AND c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql index 8cabf3bfd..1d410f585 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, - SUM(IIF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS n4, - SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - IIF( - c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%', - 1, - 0 - ) + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql index 676f8e1d1..34e9c3fc0 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, - SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql index 440100388..eb63b410b 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, - SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.ORDERS GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql index 1a13d2d5b..e194c3bd4 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(IFF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, - SUM(IFF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, - SUM(IFF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + COUNT_IF(o_orderpriority = '1-URGENT') AS sum_expr, + COUNT_IF(o_orderpriority = '2-HIGH') AS sum_expr_21, + COUNT_IF(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql index 13f47047c..34e9c3fc0 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(IIF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, - SUM(IIF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, - SUM(IIF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql index af28c71cf..cde69334c 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql @@ -1,3 +1,3 @@ SELECT - SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio + SUM(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql index af28c71cf..cde69334c 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql @@ -1,3 +1,3 @@ SELECT - SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio + SUM(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql index 7a6db2f6b..f3e20bfa6 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql @@ -1,3 +1,3 @@ SELECT - SUM(IFF(rating > 4.5, 1, 0)) / COUNT(*) AS ratio + COUNT_IF(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql index b99291f91..121904d46 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql @@ -1,3 +1,3 @@ SELECT - CAST(SUM(IIF(rating > 4.5, 1, 0)) AS REAL) / COUNT(*) AS ratio + CAST(SUM(rating > 4.5) AS REAL) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql index d9975b8d2..586affd62 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql @@ -1,5 +1,11 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + SUM(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql index d9975b8d2..586affd62 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql @@ -1,5 +1,11 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + SUM(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql index 74d49c9a3..9d98b0324 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql @@ -1,5 +1,11 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) AS ratio + CAST(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / CASE + WHEN ( + COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) + ) <> 0 + THEN COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql index 9f437c5c6..cfee256b5 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql @@ -1,5 +1,11 @@ SELECT - COUNT_IF(LOWER(food_type) = 'vegan') / NULLIF(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) AS ratio + COUNT_IF(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index 06f824271..a4f7ac562 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -1,5 +1,11 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql index c15bdb8d1..ee1edcfa8 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio + SUM(LOWER(food_type) = 'italian') / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql index c15bdb8d1..ee1edcfa8 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio + SUM(LOWER(food_type) = 'italian') / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql index 5016a56a6..aaadabc97 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(COUNT(*), 0) AS ratio + CAST(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql index c5dcfc7fb..ca869e50a 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'italian'), 0) AS REAL) / NULLIF(COUNT(*), 0) AS ratio + CAST(SUM(LOWER(food_type) = 'italian') AS REAL) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql index cf572383b..3f8169add 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql index d3c25d5a9..48afc6331 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql index de01bfd58..9041e014f 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(IFF(NOT restaurant.rating IS NULL, 1, 0)) AS sum_expr, + COUNT_IF(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql index 2ea3ca06a..f10e4066b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(IIF(NOT restaurant.rating IS NULL, 1, 0)) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 From 87eec09f02a3cd47d2cce30179521948f6522325 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:22:22 -0800 Subject: [PATCH 10/12] Removing old --- tests/test_pipeline_tpch_custom.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index d67037a54..4fbe05137 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2690,19 +2690,6 @@ ), id="quarter_function_test", ), - pytest.param( - PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", - "TPCH", - lambda: pd.DataFrame( - { - "n": [30183], - } - ), - "redundant_has", - ), - id="redundant_has", - ), pytest.param( PyDoughPandasTest( "result = TPCH.CALCULATE(" From 9fe336cf5db5b83c9d597199d5faa2d33197f54f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:51:23 -0800 Subject: [PATCH 11/12] Added extra BIRD tests --- pydough/conversion/hybrid_tree.py | 4 -- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 54 +++++++++++++++++++ tests/test_plan_refsols/donor_3276.txt | 4 ++ tests/test_plan_refsols/movielens_2274.txt | 6 +++ tests/test_sql_refsols/donor_3276_ansi.sql | 7 +++ tests/test_sql_refsols/donor_3276_mysql.sql | 7 +++ .../test_sql_refsols/donor_3276_postgres.sql | 7 +++ .../test_sql_refsols/donor_3276_snowflake.sql | 7 +++ tests/test_sql_refsols/donor_3276_sqlite.sql | 7 +++ .../test_sql_refsols/movielens_2274_ansi.sql | 11 ++++ .../test_sql_refsols/movielens_2274_mysql.sql | 22 ++++++++ .../movielens_2274_postgres.sql | 22 ++++++++ .../movielens_2274_snowflake.sql | 18 +++++++ .../movielens_2274_sqlite.sql | 22 ++++++++ 15 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 tests/test_plan_refsols/donor_3276.txt create mode 100644 tests/test_plan_refsols/movielens_2274.txt create mode 100644 tests/test_sql_refsols/donor_3276_ansi.sql create mode 100644 tests/test_sql_refsols/donor_3276_mysql.sql create mode 100644 tests/test_sql_refsols/donor_3276_postgres.sql create mode 100644 tests/test_sql_refsols/donor_3276_snowflake.sql create mode 100644 tests/test_sql_refsols/donor_3276_sqlite.sql create mode 100644 tests/test_sql_refsols/movielens_2274_ansi.sql create mode 100644 tests/test_sql_refsols/movielens_2274_mysql.sql create mode 100644 tests/test_sql_refsols/movielens_2274_postgres.sql create mode 100644 tests/test_sql_refsols/movielens_2274_snowflake.sql create mode 100644 tests/test_sql_refsols/movielens_2274_sqlite.sql diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index ab716efb5..aeea1bf38 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -980,10 +980,6 @@ def remove_dead_children(self, must_remove: set[int]) -> dict[int, int]: ) or self.children[child_idx].connection_type.is_anti: children_to_delete.discard(child_idx) - print() - print(self) - print(children_to_delete) - if len(children_to_delete) == 0: return {i: i for i in range(len(self.children))} diff --git a/tests/conftest.py b/tests/conftest.py index 8334590e0..cdbb7c47f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -682,7 +682,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators", "menu"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu", "donor", "movielens"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 9570029f2..e23be6a79 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -137,6 +137,60 @@ ), id="menu_5556", ), + pytest.param( + PyDoughPandasTest( + """ +result = donor.CALCULATE( + total_sb_projects=COUNT(projects.WHERE(LOWER(school_city) == "santa barbara")), + suburban_sb_projects=COUNT(projects.WHERE( + (LOWER(school_city) == "santa barbara") & + (LOWER(school_metro) == "suburban") + )) +).CALCULATE( + percentage_suburban=100.0 * suburban_sb_projects / total_sb_projects +) + """, + "donor", + lambda: pd.DataFrame( + { + "percentage_suburban": [30.303], + } + ), + "donor_3276", + ), + id="donor_3276", + ), + pytest.param( + PyDoughPandasTest( + """ +result = movielens.CALCULATE( + # Count all users who have at least one rating of 2 + total_users_with_rating_2 = COUNT(users.WHERE( + HAS(submitted_ratings.WHERE(rating == 2)) + )), + # Count female users who have at least one rating of 2 + female_users_with_rating_2 = COUNT(users.WHERE( + HAS(submitted_ratings.WHERE(rating == 2)) & (LOWER(u_gender) == 'f') + )) +).CALCULATE( + # Calculate the percentage, handling division by zero + percentage_of_female_users = IFF( + total_users_with_rating_2 > 0, + 100.0 * female_users_with_rating_2 / total_users_with_rating_2, + 0.0 + ) +) + """, + "movielens", + lambda: pd.DataFrame( + { + "percentage_of_female_users": [27.961], + } + ), + "movielens_2274", + ), + id="movielens_2274", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/donor_3276.txt b/tests/test_plan_refsols/donor_3276.txt new file mode 100644 index 000000000..ca4a91b96 --- /dev/null +++ b/tests/test_plan_refsols/donor_3276.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('percentage_suburban', 100.0:numeric * suburban_sb_projects / n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'suburban_sb_projects': SUM(LOWER(school_metro) == 'suburban':string)}) + FILTER(condition=LOWER(school_city) == 'santa barbara':string, columns={'school_metro': school_metro}) + SCAN(table=main.projects, columns={'school_city': school_city, 'school_metro': school_metro}) diff --git a/tests/test_plan_refsols/movielens_2274.txt b/tests/test_plan_refsols/movielens_2274.txt new file mode 100644 index 000000000..f16c77a47 --- /dev/null +++ b/tests/test_plan_refsols/movielens_2274.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('percentage_of_female_users', IFF(n_rows > 0:numeric, 100.0:numeric * sum_expr / n_rows, 0.0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(LOWER(u_gender) == 'f':string)}) + JOIN(condition=t0.userid == t1.userid, type=SEMI, columns={'u_gender': t0.u_gender}) + SCAN(table=main.users, columns={'u_gender': u_gender, 'userid': userid}) + FILTER(condition=rating == 2:numeric, columns={'userid': userid}) + SCAN(table=main.u2base, columns={'rating': rating, 'userid': userid}) diff --git a/tests/test_sql_refsols/donor_3276_ansi.sql b/tests/test_sql_refsols/donor_3276_ansi.sql new file mode 100644 index 000000000..c180985e8 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_ansi.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_mysql.sql b/tests/test_sql_refsols/donor_3276_mysql.sql new file mode 100644 index 000000000..c180985e8 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_mysql.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_postgres.sql b/tests/test_sql_refsols/donor_3276_postgres.sql new file mode 100644 index 000000000..65e0b08d0 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_postgres.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(CASE WHEN LOWER(school_metro) = 'suburban' THEN 1 ELSE 0 END) + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_snowflake.sql b/tests/test_sql_refsols/donor_3276_snowflake.sql new file mode 100644 index 000000000..bd9c447e5 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * COUNT_IF(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_sqlite.sql b/tests/test_sql_refsols/donor_3276_sqlite.sql new file mode 100644 index 000000000..2f303e9d6 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + CAST(( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) AS REAL) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/movielens_2274_ansi.sql b/tests/test_sql_refsols/movielens_2274_ansi.sql new file mode 100644 index 000000000..24541f689 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_ansi.sql @@ -0,0 +1,11 @@ +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +JOIN main.u2base AS u2base + ON u2base.rating = 2 AND u2base.userid = users.userid diff --git a/tests/test_sql_refsols/movielens_2274_mysql.sql b/tests/test_sql_refsols/movielens_2274_mysql.sql new file mode 100644 index 000000000..4dcbd4e83 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_mysql.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_postgres.sql b/tests/test_sql_refsols/movielens_2274_postgres.sql new file mode 100644 index 000000000..7d41e8e1d --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_postgres.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(CASE WHEN LOWER(users.u_gender) = 'f' THEN 1 ELSE 0 END) + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_snowflake.sql b/tests/test_sql_refsols/movielens_2274_snowflake.sql new file mode 100644 index 000000000..a76f7e516 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_snowflake.sql @@ -0,0 +1,18 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + IFF(COUNT(*) > 0, ( + 100.0 * COUNT_IF(LOWER(users.u_gender) = 'f') + ) / COUNT(*), 0.0) AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_sqlite.sql b/tests/test_sql_refsols/movielens_2274_sqlite.sql new file mode 100644 index 000000000..01d32927d --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_sqlite.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + IIF( + COUNT(*) > 0, + CAST(( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) AS REAL) / COUNT(*), + 0.0 + ) AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL From 6deed335a225c00e639caae2a6fb100e1df3a34d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Thu, 29 Jan 2026 10:07:45 -0800 Subject: [PATCH 12/12] Updating column bubbling name protocols --- pydough/conversion/column_bubbler.py | 79 ++++++++++++++++--- .../agg_simplification_2.txt | 4 +- .../aggregate_mixed_levels_simple.txt | 4 +- tests/test_plan_refsols/aggregate_semi.txt | 6 +- tests/test_plan_refsols/common_prefix_c.txt | 6 +- tests/test_plan_refsols/common_prefix_h.txt | 6 +- tests/test_plan_refsols/common_prefix_l.txt | 8 +- tests/test_plan_refsols/common_prefix_m.txt | 10 +-- tests/test_plan_refsols/common_prefix_n.txt | 14 ++-- tests/test_plan_refsols/common_prefix_o.txt | 16 ++-- tests/test_plan_refsols/correl_14.txt | 6 +- tests/test_plan_refsols/correl_15.txt | 6 +- .../count_multiple_filters_b.txt | 8 +- .../count_multiple_filters_c.txt | 4 +- .../cryptbank_agg_06_rewrite.txt | 4 +- tests/test_plan_refsols/exponentiation.txt | 5 +- tests/test_plan_refsols/semi_aggregate.txt | 6 +- tests/test_plan_refsols/simplification_3.txt | 4 +- tests/test_plan_refsols/tpch_q11.txt | 8 +- tests/test_plan_refsols/tpch_q6.txt | 4 +- tests/test_sql_refsols/correl_14_sqlite.sql | 6 +- tests/test_sql_refsols/correl_15_sqlite.sql | 6 +- tests/test_sql_refsols/correl_32_sqlite.sql | 52 ++++++------ .../count_multiple_filters_b_ansi.sql | 8 +- .../count_multiple_filters_b_mysql.sql | 8 +- .../count_multiple_filters_b_postgres.sql | 8 +- .../count_multiple_filters_b_snowflake.sql | 8 +- .../count_multiple_filters_b_sqlite.sql | 8 +- .../defog_broker_adv16_ansi.sql | 6 +- .../defog_broker_adv16_mysql.sql | 6 +- .../defog_broker_adv16_postgres.sql | 6 +- .../defog_broker_adv16_snowflake.sql | 6 +- .../defog_broker_adv16_sqlite.sql | 6 +- .../defog_dermtreatment_basic2_ansi.sql | 4 +- .../defog_dermtreatment_basic2_mysql.sql | 4 +- .../defog_dermtreatment_basic2_postgres.sql | 4 +- .../defog_dermtreatment_basic2_snowflake.sql | 4 +- .../defog_dermtreatment_basic2_sqlite.sql | 4 +- .../defog_restaurants_gen18_ansi.sql | 4 +- .../defog_restaurants_gen18_mysql.sql | 4 +- .../defog_restaurants_gen18_postgres.sql | 4 +- .../defog_restaurants_gen18_snowflake.sql | 4 +- .../defog_restaurants_gen18_sqlite.sql | 4 +- .../defog_restaurants_gen8_ansi.sql | 4 +- .../defog_restaurants_gen8_mysql.sql | 4 +- .../defog_restaurants_gen8_postgres.sql | 4 +- .../defog_restaurants_gen8_snowflake.sql | 4 +- .../defog_restaurants_gen8_sqlite.sql | 4 +- .../simplification_3_mysql.sql | 12 +-- .../simplification_3_sqlite.sql | 16 ++-- tests/test_sql_refsols/tpch_q11_ansi.sql | 10 +-- tests/test_sql_refsols/tpch_q11_mysql.sql | 10 +-- tests/test_sql_refsols/tpch_q11_postgres.sql | 10 +-- tests/test_sql_refsols/tpch_q11_snowflake.sql | 10 +-- tests/test_sql_refsols/tpch_q11_sqlite.sql | 10 +-- 55 files changed, 272 insertions(+), 208 deletions(-) diff --git a/pydough/conversion/column_bubbler.py b/pydough/conversion/column_bubbler.py index 880c79da9..9dea13b71 100644 --- a/pydough/conversion/column_bubbler.py +++ b/pydough/conversion/column_bubbler.py @@ -9,6 +9,7 @@ import re +import pydough.pydough_operators as pydop from pydough.relational import ( Aggregate, CallExpression, @@ -17,6 +18,7 @@ Filter, Join, Limit, + LiteralExpression, Project, RelationalExpression, RelationalNode, @@ -48,7 +50,63 @@ def name_sort_key(name: str) -> tuple[bool, bool, str]: ) -def generate_cleaner_names(expr: RelationalExpression, current_name: str) -> list[str]: +binop_namings: dict[pydop.PyDoughExpressionOperator, str] = { + pydop.ADD: "plus", + pydop.SUB: "minus", + pydop.MUL: "times", + pydop.DIV: "div", + pydop.MOD: "mod", + pydop.POW: "pow", + pydop.STARTSWITH: "startswith", + pydop.ENDSWITH: "endswith", + pydop.CONTAINS: "contains", +} +""" +TODO +""" + + +def make_cleaner_name(expr: CallExpression) -> str | None: + """ + TODO + """ + input_names: list[str] = [] + arg_name: str | None + for arg in expr.inputs: + if isinstance(arg, ColumnReference): + arg_name = arg.name + # Remove any non-alphanumeric characters to make a cleaner name + # and underscores + arg_name = re.sub(r"[^a-zA-Z0-9_]", "", arg_name) + input_names.append(arg_name) + elif isinstance(arg, CallExpression): + arg_name = make_cleaner_name(arg) + if arg_name is None: + return None + input_names.append(arg_name) + elif isinstance(arg, LiteralExpression): + # For literals, use their value directly in the name if it's + # a simple type + if isinstance(arg.value, (str, int, float, bool)): + arg_name = str(arg.value) + arg_name = re.sub(r"[^a-zA-Z0-9_]", "", arg_name) + input_names.append(arg_name) + else: + return None + else: + return None + cleaner_name: str | None = None + if len(expr.inputs) == 1: + cleaner_name = f"{expr.op.function_name.lower()}_{input_names[0]}" + elif len(expr.inputs) == 2 and expr.op in binop_namings: + cleaner_name = f"{input_names[0]}_{binop_namings[expr.op]}_{input_names[1]}" + + if cleaner_name is not None and cleaner_name.isidentifier(): + return cleaner_name + return None + + +def generate_cleaner_names(expr: RelationalExpression, current_name) -> list[str]: """ Generates more readable names for an expression based on its, if applicable. The patterns of name generation are: @@ -73,21 +131,18 @@ def generate_cleaner_names(expr: RelationalExpression, current_name: str) -> lis """ result: list[str] = [] if isinstance(expr, CallExpression): - if len(expr.inputs) == 1: - input_expr = expr.inputs[0] - if isinstance(input_expr, ColumnReference): - input_name: str = input_expr.name - # Remove any non-alphanumeric characters to make a cleaner name - # and underscores - input_name = re.sub(r"[^a-zA-Z0-9_]", "", input_name) - cleaner_name: str = f"{expr.op.function_name.lower()}_{input_name}" - - result.append(cleaner_name) + cleaner_name: str | None = make_cleaner_name(expr) + if cleaner_name is not None: + result.append(cleaner_name) if len(expr.inputs) == 0 and expr.op.function_name.lower() == "count": result.append("n_rows") - if not (current_name.startswith("agg") or current_name.startswith("expr")): + if not ( + current_name is None + or current_name.startswith("agg") + or current_name.startswith("expr") + ): if re.match(r"^(.*)_[0-9]+$", current_name): result.append(re.findall(r"^(.*)_[0-9]+$", current_name)[0]) return result diff --git a/tests/test_plan_refsols/agg_simplification_2.txt b/tests/test_plan_refsols/agg_simplification_2.txt index 3a26c2eb6..fc29aa3da 100644 --- a/tests/test_plan_refsols/agg_simplification_2.txt +++ b/tests/test_plan_refsols/agg_simplification_2.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('state', sbCustState), ('a1', ndistinct_sbCustCity), ('a2', sum_n_rows), ('a3', sum_count_expr), ('a4', DEFAULT_TO(sum_sum_integer_sbCustPostalCode, 0:numeric)), ('a5', min_sbCustPhone), ('a6', max_sbCustPhone), ('a7', min_anything_lower_sbCustState), ('a8', min_anything_lower_sbCustState), ('a9', min_anything_lower_sbCustState)], orderings=[(sbCustState):asc_first]) - AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_sbCustPhone': MAX(sbCustPhone), 'min_anything_lower_sbCustState': ANYTHING(LOWER(sbCustState)), 'min_sbCustPhone': MIN(sbCustPhone), 'ndistinct_sbCustCity': NDISTINCT(sbCustCity), 'sum_count_expr': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_n_rows': COUNT(), 'sum_sum_integer_sbCustPostalCode': SUM(INTEGER(sbCustPostalCode))}) +ROOT(columns=[('state', sbCustState), ('a1', ndistinct_sbCustCity), ('a2', sum_n_rows), ('a3', sum_count_expr), ('a4', DEFAULT_TO(sum_integer_sbCustPostalCode, 0:numeric)), ('a5', min_sbCustPhone), ('a6', max_sbCustPhone), ('a7', anything_lower_sbCustState), ('a8', anything_lower_sbCustState), ('a9', anything_lower_sbCustState)], orderings=[(sbCustState):asc_first]) + AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'anything_lower_sbCustState': ANYTHING(LOWER(sbCustState)), 'max_sbCustPhone': MAX(sbCustPhone), 'min_sbCustPhone': MIN(sbCustPhone), 'ndistinct_sbCustCity': NDISTINCT(sbCustCity), 'sum_count_expr': COUNT(KEEP_IF(sbCustName, STARTSWITH(LOWER(sbCustName), 'j':string))), 'sum_integer_sbCustPostalCode': SUM(INTEGER(sbCustPostalCode)), 'sum_n_rows': COUNT()}) SCAN(table=main.sbCustomer, columns={'sbCustCity': sbCustCity, 'sbCustName': sbCustName, 'sbCustPhone': sbCustPhone, 'sbCustPostalCode': sbCustPostalCode, 'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt index c48246b61..b43179bc3 100644 --- a/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt +++ b/tests/test_plan_refsols/aggregate_mixed_levels_simple.txt @@ -1,5 +1,5 @@ -ROOT(columns=[('order_key', l_orderkey), ('max_ratio', max_ratio)], orderings=[]) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_ratio': MAX(l_quantity / ps_availqty)}) +ROOT(columns=[('order_key', l_orderkey), ('max_ratio', max_l_quantity_div_ps_availqty)], orderings=[]) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_l_quantity_div_ps_availqty': MAX(l_quantity / ps_availqty)}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_quantity': t0.l_quantity, 'ps_availqty': t1.ps_availqty}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 6c4ccc09c..c70551a7a 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_present_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice, 'sum_present_p_retailprice': t1.sum_present_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice), 'sum_present_p_retailprice': SUM(PRESENT(p_retailprice))}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_c.txt b/tests/test_plan_refsols/common_prefix_c.txt index 07216ea7f..953f97a20 100644 --- a/tests/test_plan_refsols/common_prefix_c.txt +++ b/tests/test_plan_refsols/common_prefix_c.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_rows), ('n_customers', sum_expr), ('n_suppliers', sum_n_rows), ('n_orders', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n_parts', sum_sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_n_rows, 'sum_n_rows': t1.sum_sum_n_rows_0, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_n_rows, 'sum_n_rows': t1.sum_sum_agg_0_times_agg, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr_15), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_n_rows_0': SUM(sum_n_rows_0)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_15': t0.sum_n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_n_rows_0': t1.n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_0_times_agg': SUM(sum_agg_0_times_agg_1), 'sum_sum_expr': SUM(sum_expr_15), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_agg_0_times_agg_1': t1.n_rows, 'sum_expr_15': t0.sum_n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) diff --git a/tests/test_plan_refsols/common_prefix_h.txt b/tests/test_plan_refsols/common_prefix_h.txt index b60ef9b43..a5a918cc8 100644 --- a/tests/test_plan_refsols/common_prefix_h.txt +++ b/tests/test_plan_refsols/common_prefix_h.txt @@ -1,8 +1,8 @@ ROOT(columns=[('name', r_name), ('n_nations', n_rows), ('n_orders', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n_customers', sum_expr), ('n_parts', sum_sum_n_rows), ('n_suppliers', sum_n_rows)], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_n_rows, 'sum_n_rows': t1.sum_sum_n_rows_0, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_n_rows': t1.sum_sum_n_rows}) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_n_rows, 'sum_n_rows': t1.sum_sum_agg_0_times_agg, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_n_rows': t1.sum_sum_n_rows}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr_15), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_n_rows_0': SUM(sum_n_rows_0)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr_15': t0.sum_n_rows, 'sum_n_rows': t1.sum_n_rows, 'sum_n_rows_0': t1.n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_agg_0_times_agg': SUM(sum_agg_0_times_agg_1), 'sum_sum_expr': SUM(sum_expr_15), 'sum_sum_n_rows': SUM(sum_n_rows)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_agg_0_times_agg_1': t1.n_rows, 'sum_expr_15': t0.sum_n_rows, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_n_rows': t1.sum_n_rows}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) diff --git a/tests/test_plan_refsols/common_prefix_l.txt b/tests/test_plan_refsols/common_prefix_l.txt index 7694c7141..53e6ac5ef 100644 --- a/tests/test_plan_refsols/common_prefix_l.txt +++ b/tests/test_plan_refsols/common_prefix_l.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) +ROOT(columns=[('cust_name', c_name), ('nation_name', n_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(sum_s_acctbal_div_count_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric))], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': t1.sum_s_acctbal_div_count_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': t1.sum_s_acctbal_div_count_s_acctbal}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - PROJECT(columns={'avg_s_acctbal': sum_s_acctbal / count_s_acctbal, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_rows': n_rows, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_rows': n_rows, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': sum_s_acctbal / count_s_acctbal}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_acctbal': COUNT(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_m.txt b/tests/test_plan_refsols/common_prefix_m.txt index ba4252b2b..30461ae5e 100644 --- a/tests/test_plan_refsols/common_prefix_m.txt +++ b/tests/test_plan_refsols/common_prefix_m.txt @@ -1,10 +1,10 @@ -ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(avg_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) +ROOT(columns=[('cust_name', c_name), ('n_selected_suppliers', DEFAULT_TO(n_rows, 0:numeric)), ('selected_suppliers_min', min_s_acctbal), ('selected_suppliers_max', max_s_acctbal), ('selected_suppliers_avg', ROUND(sum_s_acctbal_div_count_s_acctbal, 2:numeric)), ('selected_suppliers_sum', DEFAULT_TO(sum_s_acctbal, 0:numeric)), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_name': t0.c_name, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t1.n_name, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': t1.sum_s_acctbal_div_count_s_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'avg_s_acctbal': t0.avg_s_acctbal, 'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_s_acctbal': t1.avg_s_acctbal, 'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'max_s_acctbal': t0.max_s_acctbal, 'min_s_acctbal': t0.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t0.n_rows, 'sum_s_acctbal': t0.sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': t0.sum_s_acctbal_div_count_s_acctbal}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'max_s_acctbal': t1.max_s_acctbal, 'min_s_acctbal': t1.min_s_acctbal, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_s_acctbal': t1.sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': t1.sum_s_acctbal_div_count_s_acctbal}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - PROJECT(columns={'avg_s_acctbal': sum_s_acctbal / count_s_acctbal, 'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_rows': n_rows, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal}) + PROJECT(columns={'max_s_acctbal': max_s_acctbal, 'min_s_acctbal': min_s_acctbal, 'n_rows': n_rows, 's_nationkey': s_nationkey, 'sum_s_acctbal': sum_s_acctbal, 'sum_s_acctbal_div_count_s_acctbal': sum_s_acctbal / count_s_acctbal}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'count_s_acctbal': COUNT(s_acctbal), 'max_s_acctbal': MAX(s_acctbal), 'min_s_acctbal': MIN(s_acctbal), 'n_rows': COUNT(), 'sum_s_acctbal': SUM(s_acctbal)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'s_acctbal': t0.s_acctbal, 's_nationkey': t0.s_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 1295aedfe..65f494f86 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_sum_expr, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice, 'sum_sum_expr': t1.sum_sum_expr}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_p_container_startswith_SM, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': t0.sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_container_startswith_SM': SUM(sum_p_container_startswith_SM), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t1.sum_n_rows, 'sum_p_container_startswith_SM': t1.sum_p_container_startswith_SM, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_container_startswith_SM': t0.sum_p_container_startswith_SM, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_container_startswith_SM': SUM(STARTSWITH(p_container, 'SM':string)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index 614e51530..7a7578980 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,13 +1,13 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_sum_expr)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_expr': t1.sum_sum_sum_expr}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_p_container_startswith_SM)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': t0.sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': t1.sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_sum_expr != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) + FILTER(condition=sum_sum_p_container_startswith_SM != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_container_startswith_SM': sum_sum_p_container_startswith_SM, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_container_startswith_SM': SUM(sum_p_container_startswith_SM), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_container_startswith_SM': t0.sum_p_container_startswith_SM, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_container_startswith_SM': SUM(STARTSWITH(p_container, 'SM':string)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 22b4f80d8..02f5dd2e0 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_present_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_present_p_retailprice': t0.sum_present_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_p_retailprice': SUM(p_retailprice), 'sum_present_p_retailprice': SUM(PRESENT(p_retailprice))}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 7681d553f..45d32d1af 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) - JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) - JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_present_p_retailprice & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) + JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_present_p_retailprice': t0.sum_present_p_retailprice}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_p_retailprice': SUM(p_retailprice), 'sum_present_p_retailprice': SUM(PRESENT(p_retailprice))}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 3ebaf6592..9617d3c4d 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_c_phone_startswith_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_c_phone_startswith_11': t0.sum_c_phone_startswith_11, 'sum_expr': t0.sum_expr, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_c_phone_startswith_11}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_c_phone_startswith_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_c_phone_startswith_11': SUM(STARTSWITH(c_phone, '11':string))}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index fed82fccd..0855ba0b9 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_c_phone_startswith_11), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_c_phone_startswith_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt index c7c7e9e9a..dc9af38ea 100644 --- a/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_06_rewrite.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('n_neg', sum_expr), ('n_positive', sum_expr_3)], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(False:bool), 'sum_expr_3': SUM(True:bool)}) +ROOT(columns=[('n_neg', sum_False), ('n_positive', sum_True)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_False': SUM(False:bool), 'sum_True': SUM(True:bool)}) SCAN(table=CRBNK.TRANSACTIONS, columns={}) diff --git a/tests/test_plan_refsols/exponentiation.txt b/tests/test_plan_refsols/exponentiation.txt index ad7f1ae11..c4f3f6524 100644 --- a/tests/test_plan_refsols/exponentiation.txt +++ b/tests/test_plan_refsols/exponentiation.txt @@ -1,2 +1,3 @@ -ROOT(columns=[('low_square', sbDpLow ** 2:numeric), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(sbDpLow ** 2:numeric):asc_first], limit=10:numeric) - SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) +ROOT(columns=[('low_square', sbDpLow ** 2:numeric), ('low_sqrt', SQRT(sbDpLow)), ('low_cbrt', POWER(sbDpLow, 0.3333333333333333:numeric))], orderings=[(sbDpLow ** 2:numeric):asc_first]) + LIMIT(limit=10:numeric, columns={'sbDpLow': sbDpLow}, orderings=[(low_square):asc_first]) + SCAN(table=main.sbDailyPrice, columns={'sbDpLow': sbDpLow}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 6c4ccc09c..c70551a7a 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) - JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_present_p_retailprice), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) + JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_p_retailprice': t1.sum_p_retailprice, 'sum_present_p_retailprice': t1.sum_present_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_p_retailprice': SUM(p_retailprice), 'sum_present_p_retailprice': SUM(PRESENT(p_retailprice))}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/simplification_3.txt b/tests/test_plan_refsols/simplification_3.txt index 1cf84762d..5d3caf852 100644 --- a/tests/test_plan_refsols/simplification_3.txt +++ b/tests/test_plan_refsols/simplification_3.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool), ('s10', 91:numeric), ('s11', 0:numeric), ('s12', 42:numeric + 8:numeric), ('s13', 35:numeric), ('s14', 25.0:numeric), ('s15', ABS(n_rows * -0.75:numeric)), ('s16', 10:numeric), ('s17', n_rows), ('s18', n_rows), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', True:bool), ('s25', agg_1), ('s26', median_expr), ('s27', min_rank), ('s28', max_rank), ('s29', anything_rsum1), ('s30', ROUND(sum_rsum2, 2:numeric)), ('s31', anything_ravg1), ('s32', ROUND(sum_ravg2, 2:numeric)), ('s33', anything_rcnt1), ('s34', ROUND(sum_rcnt2, 2:numeric)), ('s35', anything_rsiz1), ('s36', ROUND(sum_rsiz2, 2:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'agg_1': QUANTILE(ABS(INTEGER(sbCustPostalCode)), 0.25:numeric), 'anything_ravg1': ANYTHING(ravg1), 'anything_rcnt1': ANYTHING(rcnt1), 'anything_rsiz1': ANYTHING(rsiz1), 'anything_rsum1': ANYTHING(rsum1), 'max_rank': MAX(rank), 'median_expr': MEDIAN(ABS(INTEGER(sbCustPostalCode))), 'min_rank': MIN(rank), 'n_rows': COUNT(), 'sum_ravg2': SUM(ravg2), 'sum_rcnt2': SUM(rcnt2), 'sum_rsiz2': SUM(rsiz2), 'sum_rsum2': SUM(rsum2)}) +ROOT(columns=[('s00', True:bool), ('s01', True:bool), ('s02', False:bool), ('s03', False:bool), ('s04', False:bool), ('s05', False:bool), ('s06', 3:numeric <= n_rows), ('s07', False:bool), ('s08', n_rows <= 6:numeric), ('s09', False:bool), ('s10', 91:numeric), ('s11', 0:numeric), ('s12', 42:numeric + 8:numeric), ('s13', 35:numeric), ('s14', 25.0:numeric), ('s15', ABS(n_rows * -0.75:numeric)), ('s16', 10:numeric), ('s17', n_rows), ('s18', n_rows), ('s19', False:bool), ('s20', True:bool), ('s21', False:bool), ('s22', True:bool), ('s23', False:bool), ('s24', True:bool), ('s25', agg_1), ('s26', median_abs_integer_sbCustPostalCode), ('s27', min_rank), ('s28', max_rank), ('s29', anything_rsum1), ('s30', ROUND(sum_rsum2, 2:numeric)), ('s31', anything_ravg1), ('s32', ROUND(sum_ravg2, 2:numeric)), ('s33', anything_rcnt1), ('s34', ROUND(sum_rcnt2, 2:numeric)), ('s35', anything_rsiz1), ('s36', ROUND(sum_rsiz2, 2:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'agg_1': QUANTILE(ABS(INTEGER(sbCustPostalCode)), 0.25:numeric), 'anything_ravg1': ANYTHING(ravg1), 'anything_rcnt1': ANYTHING(rcnt1), 'anything_rsiz1': ANYTHING(rsiz1), 'anything_rsum1': ANYTHING(rsum1), 'max_rank': MAX(rank), 'median_abs_integer_sbCustPostalCode': MEDIAN(ABS(INTEGER(sbCustPostalCode))), 'min_rank': MIN(rank), 'n_rows': COUNT(), 'sum_ravg2': SUM(ravg2), 'sum_rcnt2': SUM(rcnt2), 'sum_rsiz2': SUM(rsiz2), 'sum_rsum2': SUM(rsum2)}) PROJECT(columns={'rank': RANKING(args=[], partition=[], order=[(sbCustName):asc_last]), 'ravg1': RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'ravg2': DEFAULT_TO(RELAVG(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], frame=(None, -1)), 0.1:numeric), 'rcnt1': RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[]), 'rcnt2': DEFAULT_TO(RELCOUNT(args=[INTEGER(sbCustPostalCode)], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'rsiz1': RELSIZE(args=[], partition=[], order=[]), 'rsiz2': DEFAULT_TO(RELSIZE(args=[], partition=[], order=[(sbCustName):asc_last], frame=(1, None)), 0.1:numeric), 'rsum1': RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[]), 'rsum2': DEFAULT_TO(RELSUM(args=[ABS(DEFAULT_TO(INTEGER(sbCustPostalCode), 0:numeric))], partition=[], order=[(sbCustName):asc_last], cumulative=True), 0.1:numeric), 'sbCustPostalCode': sbCustPostalCode}) SCAN(table=main.sbCustomer, columns={'sbCustName': sbCustName, 'sbCustPostalCode': sbCustPostalCode}) diff --git a/tests/test_plan_refsols/tpch_q11.txt b/tests/test_plan_refsols/tpch_q11.txt index 64b6fc1eb..0738987c0 100644 --- a/tests/test_plan_refsols/tpch_q11.txt +++ b/tests/test_plan_refsols/tpch_q11.txt @@ -1,13 +1,13 @@ -ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', DEFAULT_TO(sum_expr, 0:numeric))], orderings=[(DEFAULT_TO(sum_expr, 0:numeric)):desc_last], limit=10:numeric) - JOIN(condition=DEFAULT_TO(t1.sum_expr, 0:numeric) > DEFAULT_TO(t0.sum_metric, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_expr': t1.sum_expr}) - AGGREGATE(keys={}, aggregations={'sum_metric': SUM(ps_supplycost * ps_availqty)}) +ROOT(columns=[('PS_PARTKEY', ps_partkey), ('VALUE', sum_ps_supplycost_times_ps_availqty)], orderings=[(sum_ps_supplycost_times_ps_availqty):desc_last], limit=10:numeric) + JOIN(condition=DEFAULT_TO(t1.sum_ps_supplycost_times_ps_availqty, 0:numeric) > DEFAULT_TO(t0.sum_ps_supplycost_times_ps_availqty, 0:numeric) * 0.0001:numeric, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_partkey': t1.ps_partkey, 'sum_ps_supplycost_times_ps_availqty': t1.sum_ps_supplycost_times_ps_availqty}) + AGGREGATE(keys={}, aggregations={'sum_ps_supplycost_times_ps_availqty': SUM(ps_supplycost * ps_availqty)}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_expr': SUM(ps_supplycost * ps_availqty)}) + AGGREGATE(keys={'ps_partkey': ps_partkey}, aggregations={'sum_ps_supplycost_times_ps_availqty': SUM(ps_supplycost * ps_availqty)}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey, 'ps_supplycost': ps_supplycost}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'s_suppkey': t0.s_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q6.txt b/tests/test_plan_refsols/tpch_q6.txt index 57562829d..2ee188673 100644 --- a/tests/test_plan_refsols/tpch_q6.txt +++ b/tests/test_plan_refsols/tpch_q6.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('REVENUE', DEFAULT_TO(sum_amt, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_amt': SUM(l_extendedprice * l_discount)}) +ROOT(columns=[('REVENUE', DEFAULT_TO(sum_l_extendedprice_times_l_discount, 0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_l_extendedprice_times_l_discount': SUM(l_extendedprice * l_discount)}) FILTER(condition=l_discount <= 0.07:numeric & l_quantity < 24:numeric & l_shipdate < datetime.date(1995, 1, 1):datetime & l_discount >= 0.05:numeric & l_shipdate >= datetime.date(1994, 1, 1):datetime, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_quantity': l_quantity, 'l_shipdate': l_shipdate}) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index 2340e279e..2c8b41e14 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,8 +1,8 @@ WITH _s4 AS ( SELECT partsupp.ps_suppkey, - SUM(NOT part.p_retailprice IS NULL) AS sum_expr, - SUM(part.p_retailprice) AS sum_p_retailprice + SUM(part.p_retailprice) AS sum_p_retailprice, + SUM(NOT part.p_retailprice IS NULL) AS sum_present_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp ON partsupp.ps_suppkey = supplier.s_suppkey @@ -22,7 +22,7 @@ JOIN tpch.part AS part ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey AND part.p_retailprice < ( - CAST(_s4.sum_p_retailprice AS REAL) / _s4.sum_expr + CAST(_s4.sum_p_retailprice AS REAL) / _s4.sum_present_p_retailprice ) AND part.p_retailprice < ( partsupp.ps_supplycost * 1.5 diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index 8fbf54a41..691f719a3 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -6,8 +6,8 @@ WITH _s0 AS ( SELECT partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - SUM(NOT part.p_retailprice IS NULL) AS sum_expr, - SUM(part.p_retailprice) AS sum_p_retailprice + SUM(part.p_retailprice) AS sum_p_retailprice, + SUM(NOT part.p_retailprice IS NULL) AS sum_present_p_retailprice FROM _s0 AS _s0 JOIN tpch.supplier AS supplier ON supplier.s_acctbal < 1000 AND supplier.s_nationkey = 19 @@ -27,7 +27,7 @@ JOIN tpch.part AS part ON part.p_container = 'LG DRUM' AND part.p_partkey = partsupp.ps_partkey AND part.p_retailprice < ( - CAST(_s6.sum_p_retailprice AS REAL) / _s6.sum_expr + CAST(_s6.sum_p_retailprice AS REAL) / _s6.sum_present_p_retailprice ) AND part.p_retailprice < ( _s6.anything_avg_p_retailprice * 0.85 diff --git a/tests/test_sql_refsols/correl_32_sqlite.sql b/tests/test_sql_refsols/correl_32_sqlite.sql index 14834f8e2..224b38261 100644 --- a/tests/test_sql_refsols/correl_32_sqlite.sql +++ b/tests/test_sql_refsols/correl_32_sqlite.sql @@ -1,4 +1,4 @@ -WITH _t2 AS ( +WITH _t3 AS ( SELECT nation.n_nationkey, supplier.s_phone, @@ -54,31 +54,39 @@ WITH _t2 AS ( ) AS expr_1, n_nationkey, AVG(expr_2) AS avg_expr_2 - FROM _t2 + FROM _t3 GROUP BY 1, 2 +), _t0 AS ( + SELECT + _s5.avg_expr_2 AS avg_expr, + customer.c_acctbal, + customer.c_name + FROM tpch.customer AS customer + JOIN _s5 AS _s5 + ON _s5.expr_1 = SUBSTRING( + customer.c_phone, + CASE + WHEN ( + LENGTH(customer.c_phone) + 0 + ) < 1 + THEN 1 + ELSE ( + LENGTH(customer.c_phone) + 0 + ) + END + ) + AND _s5.n_nationkey = customer.c_nationkey + WHERE + customer.c_mktsegment = 'AUTOMOBILE' + ORDER BY + delta + LIMIT 5 ) SELECT - customer.c_name AS customer_name, - ABS(customer.c_acctbal - _s5.avg_expr_2) AS delta -FROM tpch.customer AS customer -JOIN _s5 AS _s5 - ON _s5.expr_1 = SUBSTRING( - customer.c_phone, - CASE - WHEN ( - LENGTH(customer.c_phone) + 0 - ) < 1 - THEN 1 - ELSE ( - LENGTH(customer.c_phone) + 0 - ) - END - ) - AND _s5.n_nationkey = customer.c_nationkey -WHERE - customer.c_mktsegment = 'AUTOMOBILE' + c_name AS customer_name, + ABS(c_acctbal - avg_expr) AS delta +FROM _t0 ORDER BY 2 -LIMIT 5 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql index 4f96f56d6..82403838c 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql @@ -1,8 +1,8 @@ WITH _s0 AS ( SELECT COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11, SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE @@ -10,7 +10,7 @@ WITH _s0 AS ( ), _s1 AS ( SELECT COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11 FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -19,8 +19,8 @@ SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, + _s0.sum_c_phone_startswith_11 AS n4, + _s1.sum_c_phone_startswith_11 AS n5, _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql index 79cd17fa8..e8e697d96 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql @@ -1,8 +1,8 @@ WITH _s0 AS ( SELECT COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11, SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.CUSTOMER WHERE @@ -10,7 +10,7 @@ WITH _s0 AS ( ), _s1 AS ( SELECT COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11 FROM tpch.CUSTOMER WHERE c_mktsegment = 'BUILDING' @@ -19,8 +19,8 @@ SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, + _s0.sum_c_phone_startswith_11 AS n4, + _s1.sum_c_phone_startswith_11 AS n5, _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql index 8ce688027..e0004a09d 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql @@ -1,8 +1,8 @@ WITH _s0 AS ( SELECT COUNT(*) AS n_rows, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_c_phone_startswith_11, SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_11, SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_12 FROM tpch.customer WHERE @@ -10,7 +10,7 @@ WITH _s0 AS ( ), _s1 AS ( SELECT COUNT(*) AS n_rows, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_c_phone_startswith_11 FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -19,8 +19,8 @@ SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, + _s0.sum_c_phone_startswith_11 AS n4, + _s1.sum_c_phone_startswith_11 AS n5, _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql index a6a0d4dfd..bbd4d8345 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql @@ -1,8 +1,8 @@ WITH _s0 AS ( SELECT COUNT(*) AS n_rows, + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_c_phone_startswith_11, COUNT_IF(c_mktsegment = 'BUILDING') AS sum_expr, - COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr_11, COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS sum_expr_12 FROM tpch.customer WHERE @@ -10,7 +10,7 @@ WITH _s0 AS ( ), _s1 AS ( SELECT COUNT(*) AS n_rows, - COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_c_phone_startswith_11 FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -19,8 +19,8 @@ SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, + _s0.sum_c_phone_startswith_11 AS n4, + _s1.sum_c_phone_startswith_11 AS n5, _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql index 4f96f56d6..82403838c 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql @@ -1,8 +1,8 @@ WITH _s0 AS ( SELECT COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11, SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE @@ -10,7 +10,7 @@ WITH _s0 AS ( ), _s1 AS ( SELECT COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr + SUM(c_phone LIKE '11%') AS sum_c_phone_startswith_11 FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -19,8 +19,8 @@ SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, + _s0.sum_c_phone_startswith_11 AS n4, + _s1.sum_c_phone_startswith_11 AS n5, _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql index 4a786e59f..7e4ccf5b4 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_ansi.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_ansi.sql @@ -1,8 +1,8 @@ WITH _s1 AS ( SELECT sbtxtickerid, - SUM(sbtxtax + sbtxcommission) AS sum_expr, - SUM(sbtxamount) AS sum_sbtxamount + SUM(sbtxamount) AS sum_sbtxamount, + SUM(sbtxtax + sbtxcommission) AS sum_sbtxtax_plus_sbtxcommission FROM main.sbtransaction WHERE sbtxdatetime >= DATE_SUB(CURRENT_TIMESTAMP(), 1, MONTH) AND sbtxtype = 'sell' @@ -13,7 +13,7 @@ SELECT sbticker.sbtickersymbol AS symbol, ( 100.0 * ( - COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr, 0) + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_sbtxtax_plus_sbtxcommission, 0) ) ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker diff --git a/tests/test_sql_refsols/defog_broker_adv16_mysql.sql b/tests/test_sql_refsols/defog_broker_adv16_mysql.sql index 0d511eeac..83d2b6cf4 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_mysql.sql @@ -1,8 +1,8 @@ WITH _s1 AS ( SELECT sbtxtickerid AS sbTxTickerId, - SUM(sbtxtax + sbtxcommission) AS sum_expr, - SUM(sbtxamount) AS sum_sbTxAmount + SUM(sbtxamount) AS sum_sbTxAmount, + SUM(sbtxtax + sbtxcommission) AS sum_sbTxTax_plus_sbTxCommission FROM main.sbTransaction WHERE sbtxdatetime >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL '1' MONTH) @@ -14,7 +14,7 @@ SELECT sbTicker.sbtickersymbol COLLATE utf8mb4_bin AS symbol, ( 100.0 * ( - COALESCE(_s1.sum_sbTxAmount, 0) - COALESCE(_s1.sum_expr, 0) + COALESCE(_s1.sum_sbTxAmount, 0) - COALESCE(_s1.sum_sbTxTax_plus_sbTxCommission, 0) ) ) / COALESCE(_s1.sum_sbTxAmount, 0) AS SPM FROM main.sbTicker AS sbTicker diff --git a/tests/test_sql_refsols/defog_broker_adv16_postgres.sql b/tests/test_sql_refsols/defog_broker_adv16_postgres.sql index 746680110..882850626 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_postgres.sql @@ -1,8 +1,8 @@ WITH _s1 AS ( SELECT sbtxtickerid, - SUM(sbtxtax + sbtxcommission) AS sum_expr, - SUM(sbtxamount) AS sum_sbtxamount + SUM(sbtxamount) AS sum_sbtxamount, + SUM(sbtxtax + sbtxcommission) AS sum_sbtxtax_plus_sbtxcommission FROM main.sbtransaction WHERE sbtxdatetime >= CURRENT_TIMESTAMP - INTERVAL '1 MONTH' AND sbtxtype = 'sell' @@ -13,7 +13,7 @@ SELECT sbticker.sbtickersymbol AS symbol, ( 100.0 * ( - COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr, 0) + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_sbtxtax_plus_sbtxcommission, 0) ) ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker diff --git a/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql index 860d653b2..83a4d7546 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql @@ -1,8 +1,8 @@ WITH _s1 AS ( SELECT sbtxtickerid, - SUM(sbtxtax + sbtxcommission) AS sum_expr, - SUM(sbtxamount) AS sum_sbtxamount + SUM(sbtxamount) AS sum_sbtxamount, + SUM(sbtxtax + sbtxcommission) AS sum_sbtxtax_plus_sbtxcommission FROM main.sbtransaction WHERE sbtxdatetime >= DATEADD(MONTH, -1, CAST(CONVERT_TIMEZONE('UTC', CURRENT_TIMESTAMP()) AS TIMESTAMPNTZ)) @@ -14,7 +14,7 @@ SELECT sbticker.sbtickersymbol AS symbol, ( 100.0 * ( - COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr, 0) + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_sbtxtax_plus_sbtxcommission, 0) ) ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker diff --git a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql index 3b9294311..d2c2ea3da 100644 --- a/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv16_sqlite.sql @@ -1,8 +1,8 @@ WITH _s1 AS ( SELECT sbtxtickerid, - SUM(sbtxtax + sbtxcommission) AS sum_expr, - SUM(sbtxamount) AS sum_sbtxamount + SUM(sbtxamount) AS sum_sbtxamount, + SUM(sbtxtax + sbtxcommission) AS sum_sbtxtax_plus_sbtxcommission FROM main.sbtransaction WHERE sbtxdatetime >= DATETIME('now', '-1 month') AND sbtxtype = 'sell' @@ -13,7 +13,7 @@ SELECT sbticker.sbtickersymbol AS symbol, CAST(( 100.0 * ( - COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr, 0) + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_sbtxtax_plus_sbtxcommission, 0) ) ) AS REAL) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM FROM main.sbticker AS sbticker diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_ansi.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_ansi.sql index 212edfb9c..2512d056f 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic2_ansi.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_ansi.sql @@ -39,7 +39,7 @@ WITH _t2 AS ( 1 ), _s11 AS ( SELECT - SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score, _s7.ins_type FROM _t2 AS _t6 JOIN _t3 AS _t7 @@ -54,7 +54,7 @@ WITH _t2 AS ( SELECT _s10.ins_type AS insurance_type, _s10.ndistinct_patient_id AS num_distinct_patients, - _s11.avg_day100_pasi_score AS avg_pasi_score_day100 + _s11.sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score AS avg_pasi_score_day100 FROM _s10 AS _s10 LEFT JOIN _s11 AS _s11 ON _s10.ins_type = _s11.ins_type diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_mysql.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_mysql.sql index 64d3bfe81..b3a1f5691 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic2_mysql.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_mysql.sql @@ -53,7 +53,7 @@ WITH _t2 AS ( 1 ), _s11 AS ( SELECT - SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score, _s7.ins_type FROM _t2 AS _t6 LEFT JOIN _u_2 AS _u_2 @@ -70,7 +70,7 @@ WITH _t2 AS ( SELECT _s10.ins_type AS insurance_type, _s10.ndistinct_patient_id AS num_distinct_patients, - _s11.avg_day100_pasi_score AS avg_pasi_score_day100 + _s11.sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score AS avg_pasi_score_day100 FROM _s10 AS _s10 LEFT JOIN _s11 AS _s11 ON _s10.ins_type = _s11.ins_type diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_postgres.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_postgres.sql index 85a5286ba..586221585 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic2_postgres.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_postgres.sql @@ -53,7 +53,7 @@ WITH _t2 AS ( 1 ), _s11 AS ( SELECT - CAST(SUM(_s9.sum_day100_pasi_score) AS DOUBLE PRECISION) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + CAST(SUM(_s9.sum_day100_pasi_score) AS DOUBLE PRECISION) / SUM(_s9.count_day100_pasi_score) AS sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score, _s7.ins_type FROM _t2 AS _t6 LEFT JOIN _u_2 AS _u_2 @@ -70,7 +70,7 @@ WITH _t2 AS ( SELECT _s10.ins_type AS insurance_type, _s10.ndistinct_patient_id AS num_distinct_patients, - _s11.avg_day100_pasi_score AS avg_pasi_score_day100 + _s11.sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score AS avg_pasi_score_day100 FROM _s10 AS _s10 LEFT JOIN _s11 AS _s11 ON _s10.ins_type = _s11.ins_type diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_snowflake.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_snowflake.sql index 6c70b2a49..ae9db645a 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic2_snowflake.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_snowflake.sql @@ -53,7 +53,7 @@ WITH _t2 AS ( 1 ), _s11 AS ( SELECT - SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + SUM(_s9.sum_day100_pasi_score) / SUM(_s9.count_day100_pasi_score) AS sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score, _s7.ins_type FROM _t2 AS _t6 LEFT JOIN _u_2 AS _u_2 @@ -70,7 +70,7 @@ WITH _t2 AS ( SELECT _s10.ins_type AS insurance_type, _s10.ndistinct_patient_id AS num_distinct_patients, - _s11.avg_day100_pasi_score AS avg_pasi_score_day100 + _s11.sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score AS avg_pasi_score_day100 FROM _s10 AS _s10 LEFT JOIN _s11 AS _s11 ON _s10.ins_type = _s11.ins_type diff --git a/tests/test_sql_refsols/defog_dermtreatment_basic2_sqlite.sql b/tests/test_sql_refsols/defog_dermtreatment_basic2_sqlite.sql index c6b98ad18..a872eba44 100644 --- a/tests/test_sql_refsols/defog_dermtreatment_basic2_sqlite.sql +++ b/tests/test_sql_refsols/defog_dermtreatment_basic2_sqlite.sql @@ -53,7 +53,7 @@ WITH _t2 AS ( 1 ), _s11 AS ( SELECT - CAST(SUM(_s9.sum_day100_pasi_score) AS REAL) / SUM(_s9.count_day100_pasi_score) AS avg_day100_pasi_score, + CAST(SUM(_s9.sum_day100_pasi_score) AS REAL) / SUM(_s9.count_day100_pasi_score) AS sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score, _s7.ins_type FROM _t2 AS _t6 LEFT JOIN _u_2 AS _u_2 @@ -70,7 +70,7 @@ WITH _t2 AS ( SELECT _s10.ins_type AS insurance_type, _s10.ndistinct_patient_id AS num_distinct_patients, - _s11.avg_day100_pasi_score AS avg_pasi_score_day100 + _s11.sum_sum_day100_pasi_score_div_sum_count_day100_pasi_score AS avg_pasi_score_day100 FROM _s10 AS _s10 LEFT JOIN _s11 AS _s11 ON _s10.ins_type = _s11.ins_type diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen18_ansi.sql index c12293c26..74b826b19 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen18_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen18_ansi.sql @@ -23,7 +23,7 @@ WITH _s0 AS ( 1 ), _s7 AS ( SELECT - SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS avg_rating, + SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS sum_sum_rating_div_sum_count_rating, _s2.region FROM _s0 AS _s2 JOIN _s1 AS _s3 @@ -35,7 +35,7 @@ WITH _s0 AS ( ) SELECT _s6.region AS rest_region, - _s7.avg_rating + _s7.sum_sum_rating_div_sum_count_rating AS avg_rating FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen18_mysql.sql index 01b828b6a..93d8c632d 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen18_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen18_mysql.sql @@ -37,7 +37,7 @@ WITH _s0 AS ( 1 ), _s7 AS ( SELECT - SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS avg_rating, + SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS sum_sum_rating_div_sum_count_rating, _s2.region FROM _s0 AS _s2 LEFT JOIN _u_2 AS _u_2 @@ -51,7 +51,7 @@ WITH _s0 AS ( ) SELECT _s6.region COLLATE utf8mb4_bin AS rest_region, - _s7.avg_rating + _s7.sum_sum_rating_div_sum_count_rating AS avg_rating FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen18_postgres.sql index 28dbff8cb..508e6e291 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen18_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen18_postgres.sql @@ -37,7 +37,7 @@ WITH _s0 AS ( 1 ), _s7 AS ( SELECT - CAST(SUM(_s5.sum_rating) AS DOUBLE PRECISION) / SUM(_s5.count_rating) AS avg_rating, + CAST(SUM(_s5.sum_rating) AS DOUBLE PRECISION) / SUM(_s5.count_rating) AS sum_sum_rating_div_sum_count_rating, _s2.region FROM _s0 AS _s2 LEFT JOIN _u_2 AS _u_2 @@ -51,7 +51,7 @@ WITH _s0 AS ( ) SELECT _s6.region AS rest_region, - _s7.avg_rating + _s7.sum_sum_rating_div_sum_count_rating AS avg_rating FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen18_snowflake.sql index 9d8d7fb77..f42f14429 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen18_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen18_snowflake.sql @@ -37,7 +37,7 @@ WITH _s0 AS ( 1 ), _s7 AS ( SELECT - SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS avg_rating, + SUM(_s5.sum_rating) / SUM(_s5.count_rating) AS sum_sum_rating_div_sum_count_rating, _s2.region FROM _s0 AS _s2 LEFT JOIN _u_2 AS _u_2 @@ -51,7 +51,7 @@ WITH _s0 AS ( ) SELECT _s6.region AS rest_region, - _s7.avg_rating + _s7.sum_sum_rating_div_sum_count_rating AS avg_rating FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen18_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen18_sqlite.sql index 373d62e71..208f788ab 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen18_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen18_sqlite.sql @@ -37,7 +37,7 @@ WITH _s0 AS ( 1 ), _s7 AS ( SELECT - CAST(SUM(_s5.sum_rating) AS REAL) / SUM(_s5.count_rating) AS avg_rating, + CAST(SUM(_s5.sum_rating) AS REAL) / SUM(_s5.count_rating) AS sum_sum_rating_div_sum_count_rating, _s2.region FROM _s0 AS _s2 LEFT JOIN _u_2 AS _u_2 @@ -51,7 +51,7 @@ WITH _s0 AS ( ) SELECT _s6.region AS rest_region, - _s7.avg_rating + _s7.sum_sum_rating_div_sum_count_rating AS avg_rating FROM _s6 AS _s6 LEFT JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql index 3f8169add..40cc13165 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(NOT restaurant.rating IS NULL) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_present_rating, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 @@ -24,7 +24,7 @@ WITH _s1 AS ( ) SELECT _s6.region AS region_name, - _s7.sum_rating / _s7.sum_expr AS avg_rating + _s7.sum_rating / _s7.sum_present_rating AS avg_rating FROM _s6 AS _s6 JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql index 48afc6331..d2c06edcf 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(NOT restaurant.rating IS NULL) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_present_rating, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 @@ -24,7 +24,7 @@ WITH _s1 AS ( ) SELECT _s6.region COLLATE utf8mb4_bin AS region_name, - _s7.sum_rating / _s7.sum_expr AS avg_rating + _s7.sum_rating / _s7.sum_present_rating AS avg_rating FROM _s6 AS _s6 JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen8_postgres.sql index 8c6994569..9ee6da687 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_postgres.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_present_rating, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 @@ -24,7 +24,7 @@ WITH _s1 AS ( ) SELECT _s6.region AS region_name, - CAST(_s7.sum_rating AS DOUBLE PRECISION) / _s7.sum_expr AS avg_rating + CAST(_s7.sum_rating AS DOUBLE PRECISION) / _s7.sum_present_rating AS avg_rating FROM _s6 AS _s6 JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql index 9041e014f..13f641fa9 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - COUNT_IF(NOT restaurant.rating IS NULL) AS sum_expr, + COUNT_IF(NOT restaurant.rating IS NULL) AS sum_present_rating, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 @@ -24,7 +24,7 @@ WITH _s1 AS ( ) SELECT _s6.region AS region_name, - _s7.sum_rating / _s7.sum_expr AS avg_rating + _s7.sum_rating / _s7.sum_present_rating AS avg_rating FROM _s6 AS _s6 JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql index f10e4066b..b20deb2b7 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(NOT restaurant.rating IS NULL) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_present_rating, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 @@ -24,7 +24,7 @@ WITH _s1 AS ( ) SELECT _s6.region AS region_name, - CAST(_s7.sum_rating AS REAL) / _s7.sum_expr AS avg_rating + CAST(_s7.sum_rating AS REAL) / _s7.sum_present_rating AS avg_rating FROM _s6 AS _s6 JOIN _s7 AS _s7 ON _s6.region = _s7.region diff --git a/tests/test_sql_refsols/simplification_3_mysql.sql b/tests/test_sql_refsols/simplification_3_mysql.sql index cf625ef8e..b629662d9 100644 --- a/tests/test_sql_refsols/simplification_3_mysql.sql +++ b/tests/test_sql_refsols/simplification_3_mysql.sql @@ -1,6 +1,6 @@ WITH _t2 AS ( SELECT - ABS(TRUNCATE(CAST(sbcustpostalcode AS FLOAT), 0)) AS expr_13, + ABS(TRUNCATE(CAST(sbcustpostalcode AS FLOAT), 0)) AS abs_integer_sbCustPostalCode, ROW_NUMBER() OVER (ORDER BY CASE WHEN sbcustname COLLATE utf8mb4_bin IS NULL THEN 1 ELSE 0 END, sbcustname COLLATE utf8mb4_bin) AS `rank`, AVG(CAST(ABS(COALESCE(TRUNCATE(CAST(sbcustpostalcode AS FLOAT), 0), 0)) AS DOUBLE)) OVER () AS ravg1, COALESCE( @@ -35,21 +35,21 @@ WITH _t2 AS ( rsum1, rsum2, CASE - WHEN FLOOR(0.75 * COUNT(expr_13) OVER ()) < ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - THEN expr_13 + WHEN FLOOR(0.75 * COUNT(abs_integer_sbCustPostalCode) OVER ()) < ROW_NUMBER() OVER (ORDER BY abs_integer_sbCustPostalCode DESC) + THEN abs_integer_sbCustPostalCode ELSE NULL END AS expr_15, CASE WHEN ABS( ( - ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - 1.0 + ROW_NUMBER() OVER (ORDER BY abs_integer_sbCustPostalCode DESC) - 1.0 ) - ( ( - COUNT(expr_13) OVER () - 1.0 + COUNT(abs_integer_sbCustPostalCode) OVER () - 1.0 ) / 2.0 ) ) < 1.0 - THEN expr_13 + THEN abs_integer_sbCustPostalCode ELSE NULL END AS expr_16 FROM _t2 diff --git a/tests/test_sql_refsols/simplification_3_sqlite.sql b/tests/test_sql_refsols/simplification_3_sqlite.sql index 39a10da8d..1d8947f2e 100644 --- a/tests/test_sql_refsols/simplification_3_sqlite.sql +++ b/tests/test_sql_refsols/simplification_3_sqlite.sql @@ -1,6 +1,6 @@ WITH _t2 AS ( SELECT - ABS(CAST(sbcustpostalcode AS INTEGER)) AS expr_13, + ABS(CAST(sbcustpostalcode AS INTEGER)) AS abs_integer_sbcustpostalcode, ROW_NUMBER() OVER (ORDER BY sbcustname) AS rank, AVG(CAST(ABS(COALESCE(CAST(sbcustpostalcode AS INTEGER), 0)) AS REAL)) OVER () AS ravg1, COALESCE( @@ -36,26 +36,26 @@ WITH _t2 AS ( rsum2, CASE WHEN ( - CAST(0.75 * COUNT(expr_13) OVER () AS INTEGER) - CASE - WHEN 0.75 * COUNT(expr_13) OVER () < CAST(0.75 * COUNT(expr_13) OVER () AS INTEGER) + CAST(0.75 * COUNT(abs_integer_sbcustpostalcode) OVER () AS INTEGER) - CASE + WHEN 0.75 * COUNT(abs_integer_sbcustpostalcode) OVER () < CAST(0.75 * COUNT(abs_integer_sbcustpostalcode) OVER () AS INTEGER) THEN 1 ELSE 0 END - ) < ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - THEN expr_13 + ) < ROW_NUMBER() OVER (ORDER BY abs_integer_sbcustpostalcode DESC) + THEN abs_integer_sbcustpostalcode ELSE NULL END AS expr_15, CASE WHEN ABS( ( - ROW_NUMBER() OVER (ORDER BY expr_13 DESC) - 1.0 + ROW_NUMBER() OVER (ORDER BY abs_integer_sbcustpostalcode DESC) - 1.0 ) - ( CAST(( - COUNT(expr_13) OVER () - 1.0 + COUNT(abs_integer_sbcustpostalcode) OVER () - 1.0 ) AS REAL) / 2.0 ) ) < 1.0 - THEN expr_13 + THEN abs_integer_sbcustpostalcode ELSE NULL END AS expr_16 FROM _t2 diff --git a/tests/test_sql_refsols/tpch_q11_ansi.sql b/tests/test_sql_refsols/tpch_q11_ansi.sql index c4a081cb0..8d7e918bf 100644 --- a/tests/test_sql_refsols/tpch_q11_ansi.sql +++ b/tests/test_sql_refsols/tpch_q11_ansi.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -21,7 +21,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT partsupp.ps_partkey, - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey @@ -32,12 +32,12 @@ WITH _s0 AS ( ) SELECT _s9.ps_partkey AS PS_PARTKEY, - COALESCE(_s9.sum_expr, 0) AS VALUE + _s9.sum_ps_supplycost_times_ps_availqty AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr, 0) + COALESCE(_s8.sum_ps_supplycost_times_ps_availqty, 0) * 0.0001 + ) < COALESCE(_s9.sum_ps_supplycost_times_ps_availqty, 0) ORDER BY 2 DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_mysql.sql b/tests/test_sql_refsols/tpch_q11_mysql.sql index ef6ce3739..a9afba0c8 100644 --- a/tests/test_sql_refsols/tpch_q11_mysql.sql +++ b/tests/test_sql_refsols/tpch_q11_mysql.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - SUM(PARTSUPP.ps_supplycost * PARTSUPP.ps_availqty) AS sum_metric + SUM(PARTSUPP.ps_supplycost * PARTSUPP.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.PARTSUPP AS PARTSUPP JOIN _s0 AS _s0 ON PARTSUPP.ps_suppkey = _s0.s_suppkey @@ -21,7 +21,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT PARTSUPP.ps_partkey, - SUM(PARTSUPP.ps_supplycost * PARTSUPP.ps_availqty) AS sum_expr + SUM(PARTSUPP.ps_supplycost * PARTSUPP.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.PARTSUPP AS PARTSUPP JOIN _s0 AS _s4 ON PARTSUPP.ps_suppkey = _s4.s_suppkey @@ -32,12 +32,12 @@ WITH _s0 AS ( ) SELECT _s9.ps_partkey AS PS_PARTKEY, - COALESCE(_s9.sum_expr, 0) AS VALUE + _s9.sum_ps_supplycost_times_ps_availqty AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr, 0) + COALESCE(_s8.sum_ps_supplycost_times_ps_availqty, 0) * 0.0001 + ) < COALESCE(_s9.sum_ps_supplycost_times_ps_availqty, 0) ORDER BY 2 DESC LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_postgres.sql b/tests/test_sql_refsols/tpch_q11_postgres.sql index a670863f5..4dcb89451 100644 --- a/tests/test_sql_refsols/tpch_q11_postgres.sql +++ b/tests/test_sql_refsols/tpch_q11_postgres.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -21,7 +21,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT partsupp.ps_partkey, - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey @@ -32,12 +32,12 @@ WITH _s0 AS ( ) SELECT _s9.ps_partkey AS PS_PARTKEY, - COALESCE(_s9.sum_expr, 0) AS VALUE + _s9.sum_ps_supplycost_times_ps_availqty AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr, 0) + COALESCE(_s8.sum_ps_supplycost_times_ps_availqty, 0) * 0.0001 + ) < COALESCE(_s9.sum_ps_supplycost_times_ps_availqty, 0) ORDER BY 2 DESC NULLS LAST LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_snowflake.sql b/tests/test_sql_refsols/tpch_q11_snowflake.sql index a670863f5..4dcb89451 100644 --- a/tests/test_sql_refsols/tpch_q11_snowflake.sql +++ b/tests/test_sql_refsols/tpch_q11_snowflake.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -21,7 +21,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT partsupp.ps_partkey, - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey @@ -32,12 +32,12 @@ WITH _s0 AS ( ) SELECT _s9.ps_partkey AS PS_PARTKEY, - COALESCE(_s9.sum_expr, 0) AS VALUE + _s9.sum_ps_supplycost_times_ps_availqty AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr, 0) + COALESCE(_s8.sum_ps_supplycost_times_ps_availqty, 0) * 0.0001 + ) < COALESCE(_s9.sum_ps_supplycost_times_ps_availqty, 0) ORDER BY 2 DESC NULLS LAST LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q11_sqlite.sql b/tests/test_sql_refsols/tpch_q11_sqlite.sql index c4a081cb0..8d7e918bf 100644 --- a/tests/test_sql_refsols/tpch_q11_sqlite.sql +++ b/tests/test_sql_refsols/tpch_q11_sqlite.sql @@ -12,7 +12,7 @@ WITH _s0 AS ( n_name = 'GERMANY' ), _s8 AS ( SELECT - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s0 ON _s0.s_suppkey = partsupp.ps_suppkey @@ -21,7 +21,7 @@ WITH _s0 AS ( ), _s9 AS ( SELECT partsupp.ps_partkey, - SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_ps_supplycost_times_ps_availqty FROM tpch.partsupp AS partsupp JOIN _s0 AS _s4 ON _s4.s_suppkey = partsupp.ps_suppkey @@ -32,12 +32,12 @@ WITH _s0 AS ( ) SELECT _s9.ps_partkey AS PS_PARTKEY, - COALESCE(_s9.sum_expr, 0) AS VALUE + _s9.sum_ps_supplycost_times_ps_availqty AS VALUE FROM _s8 AS _s8 JOIN _s9 AS _s9 ON ( - COALESCE(_s8.sum_metric, 0) * 0.0001 - ) < COALESCE(_s9.sum_expr, 0) + COALESCE(_s8.sum_ps_supplycost_times_ps_availqty, 0) * 0.0001 + ) < COALESCE(_s9.sum_ps_supplycost_times_ps_availqty, 0) ORDER BY 2 DESC LIMIT 10