From 96dc914a2aea1c6aa5d8c0dc59028b1c7f2bebd5 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 23 Jan 2026 02:41:24 -0800 Subject: [PATCH 01/22] Added redundant HAS removal optimization and updated tests, WIP handling the MENU tests due to dataset/metadata issues --- pydough/conversion/relational_converter.py | 8 +++++- .../database_connectors/database_connector.py | 1 + pydough/unqualified/qualification.py | 3 +- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 24 ++++++++++++++++ tests/test_pipeline_tpch_custom.py | 13 +++++++++ tests/test_plan_refsols/common_prefix_ab.txt | 4 +-- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/menu_5556.txt | 12 ++++++++ tests/test_plan_refsols/redundant_has.txt | 8 ++++++ .../supplier_pct_national_qty.txt | 2 +- .../defog_broker_adv8_mysql.sql | 17 +++-------- .../defog_broker_adv8_postgres.sql | 17 +++-------- .../defog_broker_adv8_snowflake.sql | 17 +++-------- .../defog_broker_adv8_sqlite.sql | 17 +++-------- tests/test_sql_refsols/menu_5556_ansi.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_mysql.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_postgres.sql | 28 +++++++++++++++++++ .../test_sql_refsols/menu_5556_snowflake.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_sqlite.sql | 28 +++++++++++++++++++ 20 files changed, 228 insertions(+), 59 deletions(-) create mode 100644 tests/test_plan_refsols/menu_5556.txt create mode 100644 tests/test_plan_refsols/redundant_has.txt create mode 100644 tests/test_sql_refsols/menu_5556_ansi.sql create mode 100644 tests/test_sql_refsols/menu_5556_mysql.sql create mode 100644 tests/test_sql_refsols/menu_5556_postgres.sql create mode 100644 tests/test_sql_refsols/menu_5556_snowflake.sql create mode 100644 tests/test_sql_refsols/menu_5556_sqlite.sql diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c67d4117d..c86d0bec1 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -731,10 +731,16 @@ def handle_children( child_output = self.apply_aggregations( child, child_output, child.subtree.agg_keys ) + join_type: JoinType = child.connection_type.join_type + # Semi-joins on singular subtrees can be promoted to + # inner joins to avoid unnecessary complexity and + # improve performance. + if join_type == JoinType.SEMI and child.subtree.is_singular(): + join_type = JoinType.INNER context = self.join_outputs( context, child_output, - child.connection_type.join_type, + join_type, cardinality, child.reverse_cardinality, join_keys, diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index b34189442..303c7581c 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -52,6 +52,7 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: try: self.cursor.execute(sql) except Exception as e: + breakpoint() print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") raise pydough.active_session.error_builder.sql_runtime_failure( sql, e, True diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index c1566b8a6..e6e643cb5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -595,7 +595,8 @@ def qualify_access( ) if ( - isinstance(qualified_parent, GlobalContext) + isinstance(unqualified_parent, UnqualifiedRoot) + and isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name and not is_child ) or ( diff --git a/tests/conftest.py b/tests/conftest.py index 21d433ccd..2df9a6916 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -690,7 +690,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index d8b24a183..3ddba4fbf 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -113,6 +113,30 @@ ), id="wdi_albania_footnotes_1978", ), + pytest.param( + PyDoughPandasTest( + """ +result = menu.menu.WHERE( + HAS(menupages.menuitems.dish.WHERE(LOWER(name) == "baked apples with cream")) +).CALCULATE( + sponsor_name=sponsor, + max_item_price=MAX(menupages.menuitems.price) +).TOP_K( + 1, by=max_item_price.DESC() +).CALCULATE( + sponsor=sponsor_name +) + """, + "menu", + lambda: pd.DataFrame( + { + "sponsor": ["foo"], + } + ), + "menu_5556", + ), + id="menu_5556", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index ddb9a7433..3379d4c53 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2681,6 +2681,19 @@ ), id="quarter_function_test", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [30183], + } + ), + "redundant_has", + ), + id="redundant_has", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 08996397b..93550a02f 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 05678bd68..946eb0b41 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/menu_5556.txt b/tests/test_plan_refsols/menu_5556.txt new file mode 100644 index 000000000..1b56e06ba --- /dev/null +++ b/tests/test_plan_refsols/menu_5556.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('sponsor', sponsor)], orderings=[(max_price):desc_last], limit=1:numeric) + JOIN(condition=t0.id == t1.menu_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_price': t1.max_price, 'sponsor': t0.sponsor}) + SCAN(table=main.Menu, columns={'id': id, 'sponsor': sponsor}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_price': max_price, 'menu_id': menu_id}) + AGGREGATE(keys={'menu_id': menu_id}, aggregations={'max_price': MAX(price), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.dish_id == t1.id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'menu_id': t0.menu_id, 'n_rows': t1.n_rows, 'price': t0.price}) + JOIN(condition=t0.id == t1.menu_page_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'dish_id': t1.dish_id, 'menu_id': t0.menu_id, 'price': t1.price}) + SCAN(table=main.MenuPage, columns={'id': id, 'menu_id': menu_id}) + SCAN(table=main.MenuItem, columns={'dish_id': dish_id, 'menu_page_id': menu_page_id, 'price': price}) + PROJECT(columns={'id': id, 'n_rows': 1:numeric}) + FILTER(condition=LOWER(name) == 'baked apples with cream':string, columns={'id': id}) + SCAN(table=main.Dish, columns={'id': id, 'name': name}) diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt new file mode 100644 index 000000000..ab1dc1a3f --- /dev/null +++ b/tests/test_plan_refsols/redundant_has.txt @@ -0,0 +1,8 @@ +gROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/supplier_pct_national_qty.txt b/tests/test_plan_refsols/supplier_pct_national_qty.txt index d3a24fe65..86074e8ec 100644 --- a/tests/test_plan_refsols/supplier_pct_national_qty.txt +++ b/tests/test_plan_refsols/supplier_pct_national_qty.txt @@ -2,7 +2,7 @@ ROOT(columns=[('supplier_name', anything_s_name), ('nation_name', anything_n_nam AGGREGATE(keys={'l_suppkey': s_suppkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_s_name': ANYTHING(s_name), 'anything_s_nationkey': ANYTHING(s_nationkey), 'sum_l_quantity': SUM(l_quantity)}) JOIN(condition=t0.s_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 's_name': t1.s_name, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql index 17fb88b4a..743e4da74 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbCustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbTransaction.sbtxamount), 0) AS total_amount FROM main.sbTransaction AS sbTransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbTransaction.sbtxcustid +JOIN main.sbCustomer AS sbCustomer + ON LOWER(sbCustomer.sbcustcountry) = 'usa' + AND sbCustomer.sbcustid = sbTransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbTransaction.sbtxdatetime < CAST(DATE_SUB( + sbTransaction.sbtxdatetime < CAST(DATE_SUB( CURRENT_TIMESTAMP(), INTERVAL ( ( diff --git a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql index 26080ce78..f8c0db2f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', CURRENT_TIMESTAMP - CAST(( EXTRACT(DOW FROM CURRENT_TIMESTAMP) + 6 diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql index 37f29683c..712d5a34e 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', DATEADD( DAY, diff --git a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql index b94aec102..f29c3de35 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE( + sbtransaction.sbtxdatetime < DATE( 'now', '-' || CAST(( CAST(STRFTIME('%w', DATETIME('now')) AS INTEGER) + 6 diff --git a/tests/test_sql_refsols/menu_5556_ansi.sql b/tests/test_sql_refsols/menu_5556_ansi.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_ansi.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_mysql.sql b/tests/test_sql_refsols/menu_5556_mysql.sql new file mode 100644 index 000000000..0e0ec128a --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_mysql.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.Dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + MenuPage.menu_id, + MAX(MenuItem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.MenuPage AS MenuPage + JOIN main.MenuItem AS MenuItem + ON MenuItem.menu_page_id = MenuPage.id + LEFT JOIN _s3 AS _s3 + ON MenuItem.dish_id = _s3.id + GROUP BY + 1 +) +SELECT + Menu.sponsor +FROM main.Menu AS Menu +JOIN _t1 AS _t1 + ON Menu.id = _t1.menu_id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_postgres.sql b/tests/test_sql_refsols/menu_5556_postgres.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_postgres.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_snowflake.sql b/tests/test_sql_refsols/menu_5556_snowflake.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_snowflake.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_sqlite.sql b/tests/test_sql_refsols/menu_5556_sqlite.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_sqlite.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 From 03630d36b76b586787150308d35c3a33f153a9c8 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 26 Jan 2026 10:39:58 -0800 Subject: [PATCH 02/22] Working on filter merging program --- pydough/conversion/hybrid_filter_merger.py | 192 ++++++++++++++++++ pydough/conversion/hybrid_translator.py | 26 ++- tests/test_pipeline_s3_datasets.py | 2 +- tests/test_pipeline_tpch_custom.py | 67 ++++++ .../count_multiple_filters_a.txt | 8 + .../count_multiple_filters_b.txt | 24 +++ .../count_multiple_filters_c.txt | 23 +++ 7 files changed, 339 insertions(+), 3 deletions(-) create mode 100644 pydough/conversion/hybrid_filter_merger.py create mode 100644 tests/test_plan_refsols/count_multiple_filters_a.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_b.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_c.txt diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py new file mode 100644 index 000000000..fedf8ccb0 --- /dev/null +++ b/pydough/conversion/hybrid_filter_merger.py @@ -0,0 +1,192 @@ +""" +Logic to merge multiple subtrees in the hybrid tree into one if they are the +same except one of them has more filters than the other and is only used in +a COUNT aggregation, meaning the filter can be implemented by doing a SUM on +the less-filtered subtree where the SUM argument is the additional filters. +""" + +import copy + +import pydough.pydough_operators as pydop +from pydough.qdag import Literal +from pydough.types import BooleanType, NumericType + +from .hybrid_connection import ConnectionType +from .hybrid_expressions import ( + HybridExpr, + HybridFunctionExpr, + HybridLiteralExpr, +) +from .hybrid_operations import ( + HybridCalculate, + HybridFilter, + HybridLimit, +) +from .hybrid_tree import HybridTree + + +class HybridFilterMerger: + """ + TODO + """ + + def merge_filters(self, tree: HybridTree) -> None: + """ + TODO + """ + # Run the main procedure on subtrees with multiple children. + if len(tree.children) > 1: + # Identify which children are only used by a COUNT aggregation that is + # not ONLY_MATCH. + mergeable_children: set[int] = self.identify_mergeable_children(tree) + + child_filters: list[set[HybridExpr]] = [ + self.get_final_filters(child.subtree) for child in tree.children + ] + + child_isomorphisms: list[set[int]] = self.get_child_isomorphisms(tree) + + filter_dag: list[int | None] = self.make_filter_dag( + mergeable_children, child_filters, child_isomorphisms + ) + + print() + print(tree) + print(mergeable_children) + print(child_filters) + print(child_isomorphisms) + print(filter_dag) + + for source_idx, target_idx in enumerate(filter_dag): + if target_idx is None: + continue + print(source_idx, "->", target_idx) + extra_filters: set[HybridExpr] = ( + child_filters[source_idx] - child_filters[target_idx] + ) + print(extra_filters) + assert len(extra_filters) > 0 + new_cond: HybridExpr + if len(extra_filters) == 1: + new_cond = next(iter(extra_filters)) + else: + new_cond = HybridFunctionExpr( + pydop.BAN, + sorted(extra_filters, key=repr), + BooleanType(), + ) + numeric_expr: HybridExpr = HybridFunctionExpr( + pydop.IFF, + [ + new_cond, + HybridLiteralExpr(Literal(1, NumericType())), + HybridLiteralExpr(Literal(0, NumericType())), + ], + NumericType(), + ) + sum_expr: HybridExpr = HybridFunctionExpr( + pydop.SUM, + [numeric_expr], + BooleanType(), + ) + print(sum_expr) + # agg_name: str = tree.gen_agg_name(tree.children[target_idx]) + + # Run the procedure recursively on the parent tree and the child + # subtrees. + if tree.parent is not None: + self.merge_filters(tree.parent) + for child in tree.children: + self.merge_filters(child.subtree) + + def identify_mergeable_children(self, tree: HybridTree) -> set[int]: + """ + TODO + """ + return { + idx + for idx, child in enumerate(tree.children) + if ( + child.connection_type == ConnectionType.AGGREGATION + and {repr(v) for v in child.aggs.values()} == {"COUNT()"} + ) + } + + def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: + """ + TODO + """ + result: set[HybridExpr] = set() + for operation in reversed(tree.pipeline): + if isinstance(operation, HybridFilter): + result.update(operation.condition.get_conjunction()) + if operation.condition.contains_window_functions(): + break + elif isinstance(operation, HybridLimit): + break + elif isinstance(operation, HybridCalculate): + if any( + expr.contains_window_functions() + for expr in operation.new_expressions.values() + ): + break + return result + + def get_child_isomorphisms(self, tree: HybridTree) -> list[set[int]]: + """ + TODO + """ + filter_stripped_forms: list[str] = [ + self.get_filter_stripped_form(child.subtree) for child in tree.children + ] + result: list[set[int]] = [] + for i, form in enumerate(filter_stripped_forms): + alternatives: set[int] = set() + for j, other_form in enumerate(filter_stripped_forms): + if i != j and form == other_form: + alternatives.add(j) + result.append(alternatives) + return result + + def get_filter_stripped_form(self, tree: HybridTree) -> str: + """ + TODO + """ + stripped_tree = copy.deepcopy(tree) + for idx, operation in reversed(list(enumerate(stripped_tree.pipeline))): + if isinstance(operation, HybridFilter): + stripped_tree.pipeline.pop(idx) + if operation.condition.contains_window_functions(): + break + elif isinstance(operation, HybridLimit): + break + elif isinstance(operation, HybridCalculate): + if any( + expr.contains_window_functions() + for expr in operation.new_expressions.values() + ): + break + return repr(stripped_tree) + + def make_filter_dag( + self, + mergeable_children: set[int], + child_filters: list[set[HybridExpr]], + child_isomorphisms: list[set[int]], + ) -> list[int | None]: + """ + TODO + """ + dag: list[int | None] = [None for _ in range(len(child_filters))] + for idx in mergeable_children: + for other_idx in child_isomorphisms[idx]: + if child_filters[other_idx] < child_filters[idx]: + dag[idx] = other_idx + break + for idx in range(len(dag)): + if dag[idx] is not None: + while True: + target_idx: int | None = dag[idx] + if target_idx is not None: + dag[idx] = dag[target_idx] + return dag diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index c8e1e1617..c9cc27488 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -64,6 +64,7 @@ HybridSidedRefExpr, HybridWindowExpr, ) +from .hybrid_filter_merger import HybridFilterMerger from .hybrid_operations import ( HybridCalculate, HybridCollectionAccess, @@ -1680,6 +1681,19 @@ def run_hybrid_decorrelation(self, hybrid: "HybridTree") -> None: decorr.find_correlated_children(hybrid) decorr.decorrelate_hybrid_tree(hybrid) + def run_filter_merging(self, hybrid: "HybridTree") -> None: + """ + Invokes the procedure to merge identical child subtrees in the hybrid + tree if they are identical except for the filters they have, which can + be emulated via a SUM on a predicate. The transformation is done + in-place. + + Args: + `hybrid`: The hybrid tree to run filter merging on. + """ + filter_merger: HybridFilterMerger = HybridFilterMerger() + filter_merger.merge_filters(hybrid) + def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: """ Convert a PyDough QDAG node to a hybrid tree, including any necessary @@ -1704,10 +1718,18 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - # 6. Run any final rewrites, such as turning MEDIAN into an average + print() + print("BEFORE FILTER MERGING") + print(hybrid) + # 5. Run the filter-merging procedure. + self.run_filter_merging(hybrid) + print() + print("AFTER FILTER MERGING") + print(hybrid) + # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) - # 7. Remove any dead children in the hybrid tree that are no longer + # 8. Remove any dead children in the hybrid tree that are no longer # being used. hybrid.remove_dead_children(set()) return hybrid diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 3ddba4fbf..0e73a5ec8 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -130,7 +130,7 @@ "menu", lambda: pd.DataFrame( { - "sponsor": ["foo"], + "sponsor": ["MURRAY HILL HOTEL"], } ), "menu_5556", diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 3379d4c53..82827565f 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2694,6 +2694,73 @@ ), id="redundant_has", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n2=COUNT(customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [268], + } + ), + "count_multiple_filters_a", + ), + id="count_multiple_filters_a", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n2=COUNT(customers.WHERE(market_segment == 'BUILDING')), " + " n3=COUNT(customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + " n4=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600) & STARTSWITH(phone, '11'))), " + " n5=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + " n6=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600) & STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [30142], + "n3": [268], + "n4": [54], + "n5": [1261], + "n6": [19], + } + ), + "count_multiple_filters_b", + ), + id="count_multiple_filters_b", + ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(" + " n1=COUNT(customers), " + " n2=COUNT(customers.WHERE(market_segment == 'BUILDING')), " + " n3=COUNT(customers.WHERE(MONOTONIC(500, account_balance, 600))), " + " n4=COUNT(customers.WHERE(STARTSWITH(phone, '11'))), " + " n5=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING'))), " + " n6=COUNT(customers.WHERE(STARTSWITH(phone, '11') & (market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [150000], + "n2": [30142], + "n3": [1379], + "n4": [5975], + "n5": [1261], + "n6": [19], + } + ), + "count_multiple_filters_c", + ), + id="count_multiple_filters_c", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt new file mode 100644 index 000000000..88351383e --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt new file mode 100644 index 000000000..47f23d552 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -0,0 +1,24 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt new file mode 100644 index 000000000..9d21f4e0c --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -0,0 +1,23 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) From 97600a1571ce9ec76c029304664d2d75f967bee9 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 26 Jan 2026 12:07:35 -0800 Subject: [PATCH 03/22] Fixed edge case with children steps, updated test files, need to add comments --- pydough/conversion/hybrid_filter_merger.py | 56 ++++++++++++++----- pydough/conversion/hybrid_translator.py | 8 +-- tests/test_plan_refsols/common_prefix_n.txt | 28 +++++----- tests/test_plan_refsols/common_prefix_o.txt | 30 +++++----- .../count_multiple_filters_a.txt | 12 ++-- .../count_multiple_filters_b.txt | 30 +++------- .../count_multiple_filters_c.txt | 26 +-------- tests/test_plan_refsols/redundant_has.txt | 2 +- .../defog_restaurants_gen11_ansi.sql | 16 +----- .../defog_restaurants_gen11_mysql.sql | 16 +----- .../defog_restaurants_gen11_postgres.sql | 16 +----- .../defog_restaurants_gen11_snowflake.sql | 16 +----- .../defog_restaurants_gen11_sqlite.sql | 16 +----- 13 files changed, 93 insertions(+), 179 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index fedf8ccb0..5b424258e 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -6,6 +6,7 @@ """ import copy +from typing import TYPE_CHECKING import pydough.pydough_operators as pydop from pydough.qdag import Literal @@ -13,6 +14,7 @@ from .hybrid_connection import ConnectionType from .hybrid_expressions import ( + HybridChildRefExpr, HybridExpr, HybridFunctionExpr, HybridLiteralExpr, @@ -24,12 +26,18 @@ ) from .hybrid_tree import HybridTree +if TYPE_CHECKING: + from .hybrid_translator import HybridTranslator + class HybridFilterMerger: """ TODO """ + def __init__(self, translator: "HybridTranslator") -> None: + self.translator: HybridTranslator = translator + def merge_filters(self, tree: HybridTree) -> None: """ TODO @@ -40,31 +48,27 @@ def merge_filters(self, tree: HybridTree) -> None: # not ONLY_MATCH. mergeable_children: set[int] = self.identify_mergeable_children(tree) + # TODO ADD COMMENT child_filters: list[set[HybridExpr]] = [ self.get_final_filters(child.subtree) for child in tree.children ] + # TODO ADD COMMENT child_isomorphisms: list[set[int]] = self.get_child_isomorphisms(tree) + # TODO ADD COMMENT filter_dag: list[int | None] = self.make_filter_dag( mergeable_children, child_filters, child_isomorphisms ) - print() - print(tree) - print(mergeable_children) - print(child_filters) - print(child_isomorphisms) - print(filter_dag) - + # TODO ADD COMMENT + replacement_map: dict[HybridExpr, HybridExpr] = {} for source_idx, target_idx in enumerate(filter_dag): if target_idx is None: continue - print(source_idx, "->", target_idx) extra_filters: set[HybridExpr] = ( child_filters[source_idx] - child_filters[target_idx] ) - print(extra_filters) assert len(extra_filters) > 0 new_cond: HybridExpr if len(extra_filters) == 1: @@ -84,13 +88,34 @@ def merge_filters(self, tree: HybridTree) -> None: ], NumericType(), ) - sum_expr: HybridExpr = HybridFunctionExpr( + sum_expr: HybridFunctionExpr = HybridFunctionExpr( pydop.SUM, [numeric_expr], - BooleanType(), + NumericType(), + ) + agg_name: str = self.translator.gen_agg_name(tree.children[target_idx]) + tree.children[target_idx].aggs[agg_name] = sum_expr + agg_ref: HybridExpr = HybridChildRefExpr( + agg_name, target_idx, NumericType() ) - print(sum_expr) - # agg_name: str = tree.gen_agg_name(tree.children[target_idx]) + old_agg_ref = HybridChildRefExpr( + next(iter(tree.children[source_idx].aggs)), + source_idx, + NumericType(), + ) + replacement_map[old_agg_ref] = agg_ref + tree.children[target_idx].max_steps = min( + tree.children[target_idx].max_steps, + tree.children[source_idx].max_steps, + ) + tree.children[target_idx].min_steps = min( + tree.children[target_idx].min_steps, + tree.children[source_idx].min_steps, + ) + + # TODO ADD COMMENT + for operation in tree.pipeline: + operation.replace_expressions(replacement_map) # Run the procedure recursively on the parent tree and the child # subtrees. @@ -187,6 +212,7 @@ def make_filter_dag( if dag[idx] is not None: while True: target_idx: int | None = dag[idx] - if target_idx is not None: - dag[idx] = dag[target_idx] + if target_idx is None or dag[target_idx] is None: + break + dag[idx] = dag[target_idx] return dag diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index c9cc27488..22f4adb29 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1691,7 +1691,7 @@ def run_filter_merging(self, hybrid: "HybridTree") -> None: Args: `hybrid`: The hybrid tree to run filter merging on. """ - filter_merger: HybridFilterMerger = HybridFilterMerger() + filter_merger: HybridFilterMerger = HybridFilterMerger(self) filter_merger.merge_filters(hybrid) def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: @@ -1718,14 +1718,8 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - print() - print("BEFORE FILTER MERGING") - print(hybrid) # 5. Run the filter-merging procedure. self.run_filter_merging(hybrid) - print() - print("AFTER FILTER MERGING") - print(hybrid) # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 0ac7dc610..2cf16811e 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,20 +1,18 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'n_rows': n_rows, 'ndistinct_n_name': ndistinct_n_name, 'sum_n_rows': sum_n_rows, 'sum_p_retailprice': sum_p_retailprice}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'n_rows': t0.n_rows, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'n_rows': t1.n_rows, 'o_orderdate': t0.o_orderdate, 'p_retailprice': t1.p_retailprice, 's_acctbal': t1.s_acctbal}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_agg, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_agg': t1.sum_agg, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_rows': t1.n_rows, 'p_retailprice': t0.p_retailprice, 's_acctbal': t0.s_acctbal}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'p_retailprice': t0.p_retailprice, 's_acctbal': t1.s_acctbal}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'p_retailprice': t1.p_retailprice}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) - SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) - PROJECT(columns={'n_rows': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_nationkey': t1.s_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index fe0307f7b..f9f6f44f3 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,22 +1,18 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_sum_n_rows)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_n_rows': t0.sum_sum_sum_n_rows}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_n_rows': t1.sum_sum_sum_n_rows}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_agg)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t1.sum_sum_agg, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_sum_n_rows != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_n_rows': sum_sum_sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_n_rows': SUM(sum_sum_n_rows)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_n_rows': t0.sum_sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_n_rows': SUM(sum_n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_retailprice': t1.p_retailprice, 'sum_n_rows': t0.sum_n_rows}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t1.n_rows}) - FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) - PROJECT(columns={'n_rows': 1:numeric, 'p_partkey': p_partkey}) - FILTER(condition=STARTSWITH(p_container, 'SM':string), columns={'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) + FILTER(condition=sum_sum_agg != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate, 'l_suppkey': l_suppkey}) + SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_suppkey': s_suppkey}) AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'ndistinct_n_name': NDISTINCT(n_name)}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'n_name': t1.n_name}) diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt index 88351383e..c4182bc18 100644 --- a/tests/test_plan_refsols/count_multiple_filters_a.txt +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -1,8 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) +ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 47f23d552..281cd5a3c 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,24 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_6), ('n4', agg_7), ('n5', agg_8), ('n6', agg_9)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'agg_6': t0.agg_6, 'agg_7': t0.agg_7, 'agg_8': t1.agg_8, 'agg_9': t0.agg_9, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'agg_6': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'agg_7': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'agg_9': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'agg_8': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index 9d21f4e0c..78d1de0ad 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,23 +1,3 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - SCAN(table=tpch.CUSTOMER, columns={}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) +ROOT(columns=[('n1', n_rows), ('n2', n2), ('n3', n3), ('n4', n4), ('n5', n5), ('n6', n6)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n3': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric), 1:numeric, 0:numeric)), 'n4': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n5': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n6': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt index ab1dc1a3f..dafed22e4 100644 --- a/tests/test_plan_refsols/redundant_has.txt +++ b/tests/test_plan_refsols/redundant_has.txt @@ -1,4 +1,4 @@ -gROOT(columns=[('n', n_rows)], orderings=[]) +ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql index 832005452..af28c71cf 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql index 832005452..af28c71cf 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql index bf0e704bf..048ba9823 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_postgres.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - CAST(_s0.n_rows AS DOUBLE PRECISION) / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + CAST(SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql index 832005452..7a6db2f6b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - _s0.n_rows / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + SUM(IFF(rating > 4.5, 1, 0)) / COUNT(*) AS ratio +FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql index 4b5cce5d6..b99291f91 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql @@ -1,15 +1,3 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant - WHERE - rating > 4.5 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM main.restaurant -) SELECT - CAST(_s0.n_rows AS REAL) / _s1.n_rows AS ratio -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 + CAST(SUM(IIF(rating > 4.5, 1, 0)) AS REAL) / COUNT(*) AS ratio +FROM main.restaurant From f2db22244f59e9bbd6434f16de55f8d7cb86fbbf Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 27 Jan 2026 10:42:38 -0800 Subject: [PATCH 04/22] Resolving test issues [RUN CI] --- tests/test_masked_sqlite.py | 6 +---- .../count_multiple_filters_a_ansi.sql | 6 +++++ .../count_multiple_filters_a_mysql.sql | 6 +++++ .../count_multiple_filters_a_postgres.sql | 6 +++++ .../count_multiple_filters_a_snowflake.sql | 6 +++++ .../count_multiple_filters_a_sqlite.sql | 6 +++++ .../count_multiple_filters_b_ansi.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_mysql.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_postgres.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_snowflake.sql | 26 +++++++++++++++++++ .../count_multiple_filters_b_sqlite.sql | 26 +++++++++++++++++++ .../count_multiple_filters_c_ansi.sql | 17 ++++++++++++ .../count_multiple_filters_c_mysql.sql | 17 ++++++++++++ .../count_multiple_filters_c_postgres.sql | 17 ++++++++++++ .../count_multiple_filters_c_snowflake.sql | 17 ++++++++++++ .../count_multiple_filters_c_sqlite.sql | 17 ++++++++++++ tests/test_sql_refsols/redundant_has_ansi.sql | 7 +++++ .../test_sql_refsols/redundant_has_mysql.sql | 7 +++++ .../redundant_has_postgres.sql | 7 +++++ .../redundant_has_snowflake.sql | 7 +++++ .../test_sql_refsols/redundant_has_sqlite.sql | 7 +++++ 21 files changed, 281 insertions(+), 5 deletions(-) create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_sqlite.sql diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 731aa14c9..536cf78c6 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1553,10 +1553,6 @@ def test_pipeline_e2e_cryptbank( + ")", [ { - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'o']", - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", @@ -1565,8 +1561,8 @@ def test_pipeline_e2e_cryptbank( "DRY_RUN", }, { - "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", diff --git a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql new file mode 100644 index 000000000..ba0018b62 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql new file mode 100644 index 000000000..a43c29266 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.CUSTOMER +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql new file mode 100644 index 000000000..ba0018b62 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql new file mode 100644 index 000000000..6adc35063 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql new file mode 100644 index 000000000..703d56924 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS n1, + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 +FROM tpch.customer +WHERE + c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql new file mode 100644 index 000000000..92f7c16eb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql new file mode 100644 index 000000000..287a8d2e6 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.CUSTOMER + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.CUSTOMER + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql new file mode 100644 index 000000000..92f7c16eb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql new file mode 100644 index 000000000..bc13323f9 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_7, + SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql new file mode 100644 index 000000000..7cddc1ce8 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql @@ -0,0 +1,26 @@ +WITH _s0 AS ( + SELECT + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_7, + SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS agg_9, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_acctbal <= 600 AND c_acctbal >= 500 +), _s1 AS ( + SELECT + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_8, + COUNT(*) AS n_rows + FROM tpch.customer + WHERE + c_mktsegment = 'BUILDING' +) +SELECT + _s0.n_rows AS n1, + _s1.n_rows AS n2, + _s0.agg_6 AS n3, + _s0.agg_7 AS n4, + _s1.agg_8 AS n5, + _s0.agg_9 AS n6 +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql new file mode 100644 index 000000000..3861720ce --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql new file mode 100644 index 000000000..a3e0f4986 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.CUSTOMER diff --git a/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql new file mode 100644 index 000000000..3861720ce --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, + SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM( + CASE + WHEN c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' + THEN 1 + ELSE 0 + END + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql new file mode 100644 index 000000000..609166553 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, + SUM(IFF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, + SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS n4, + SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS n5, + SUM( + IFF( + STARTSWITH(c_phone, '11') + AND c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING', + 1, + 0 + ) + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql new file mode 100644 index 000000000..8cabf3bfd --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql @@ -0,0 +1,17 @@ +SELECT + COUNT(*) AS n1, + SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, + SUM(IIF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, + SUM(IIF(c_phone LIKE '11%', 1, 0)) AS n4, + SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS n5, + SUM( + IIF( + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%', + 1, + 0 + ) + ) AS n6 +FROM tpch.customer diff --git a/tests/test_sql_refsols/redundant_has_ansi.sql b/tests/test_sql_refsols/redundant_has_ansi.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_mysql.sql b/tests/test_sql_refsols/redundant_has_mysql.sql new file mode 100644 index 000000000..9dac38536 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +JOIN tpch.NATION AS NATION + ON CUSTOMER.c_nationkey = NATION.n_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_postgres.sql b/tests/test_sql_refsols/redundant_has_postgres.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_snowflake.sql b/tests/test_sql_refsols/redundant_has_snowflake.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_sqlite.sql b/tests/test_sql_refsols/redundant_has_sqlite.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' From 7c0ab19625f248ac097785fbc9abc75c44030996 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Tue, 27 Jan 2026 12:51:32 -0800 Subject: [PATCH 05/22] switch redundant has and add tests --- pydough/conversion/relational_converter.py | 9 +- tests/test_pipeline_tpch_custom.py | 117 +++++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/pydough/conversion/relational_converter.py b/pydough/conversion/relational_converter.py index c67d4117d..a6f042e84 100644 --- a/pydough/conversion/relational_converter.py +++ b/pydough/conversion/relational_converter.py @@ -731,10 +731,17 @@ def handle_children( child_output = self.apply_aggregations( child, child_output, child.subtree.agg_keys ) + # Optimize SEMI to INNER for singular subtrees + join_type = child.connection_type.join_type + if ( + child.connection_type == ConnectionType.SEMI + and child.subtree.is_singular() + ): + join_type = JoinType.INNER context = self.join_outputs( context, child_output, - child.connection_type.join_type, + join_type, cardinality, child.reverse_cardinality, join_keys, diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 1f1392077..6caac393e 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2739,6 +2739,123 @@ ), id="double_cross", ), + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [30183], + } + ), + "redundant_has", + ), + id="redundant_has", + ), + # Nested HAS on singular chain (supplier -> nation -> region), both should optimize to INNER + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(HAS(region.WHERE(name == 'AFRICA')))))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [1955], + } + ), + "redundant_has_nested", + ), + id="redundant_has_nested", + ), + # HAS on plural relationship (orders) - should NOT optimize, stays SEMI + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(orders.WHERE(total_price > 400000)))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [3533], + } + ), + "redundant_has_on_plural", + ), + id="redundant_has_on_plural", + ), + # HAS on singular relationship with additional filter + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(region.name == 'EUROPE')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [1987], + } + ), + "redundant_has_singular_chain", + ), + id="redundant_has_singular_chain", + ), + # HAS on plural relationship (lineitems) - should NOT optimize, stays SEMI + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(orders.WHERE(HAS(lines.WHERE(quantity > 49)))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [115066], + } + ), + "redundant_has_on_plural_lineitems", + ), + id="redundant_has_on_plural_lineitems", + ), + # HASNOT on singular relationship - should optimize to ANTI join or similar + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HASNOT(nation.WHERE(region.name == 'AFRICA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [8045], + } + ), + "redundant_has_not_on_singular", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_not_on_singular", + ), + # HAS without WHERE filter on singular - should optimize to INNER + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [150000], + } + ), + "redundant_has_no_filter_singular", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_no_filter_singular", + ), + # HAS on singular within plural context - orders whose customer is from ASIA + pytest.param( + PyDoughPandasTest( + "result = TPCH.CALCULATE(n=COUNT(orders.WHERE(HAS(customer.WHERE(nation.region.name == 'ASIA')))))", + "TPCH", + lambda: pd.DataFrame( + { + "n": [301740], + } + ), + "redundant_has_singular_in_plural_context", + skip_relational=True, + skip_sql=True, + ), + id="redundant_has_singular_in_plural_context", + ), pytest.param( PyDoughPandasTest( bad_child_reuse_1, From f074118f11840975ea90bb171a97032d77da51a9 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Tue, 27 Jan 2026 12:52:22 -0800 Subject: [PATCH 06/22] add test files --- tests/test_plan_refsols/redundant_has.txt | 8 ++++++++ tests/test_plan_refsols/redundant_has_nested.txt | 8 ++++++++ .../redundant_has_on_plural.txt | 6 ++++++ .../redundant_has_on_plural_lineitems.txt | 6 ++++++ .../redundant_has_singular_chain.txt | 8 ++++++++ tests/test_sql_refsols/redundant_has_ansi.sql | 7 +++++++ tests/test_sql_refsols/redundant_has_mysql.sql | 7 +++++++ .../redundant_has_nested_ansi.sql | 7 +++++++ .../redundant_has_nested_mysql.sql | 7 +++++++ .../redundant_has_nested_postgres.sql | 7 +++++++ .../redundant_has_nested_snowflake.sql | 7 +++++++ .../redundant_has_nested_sqlite.sql | 7 +++++++ .../redundant_has_on_plural_ansi.sql | 5 +++++ .../redundant_has_on_plural_lineitems_ansi.sql | 5 +++++ .../redundant_has_on_plural_lineitems_mysql.sql | 16 ++++++++++++++++ ...edundant_has_on_plural_lineitems_postgres.sql | 16 ++++++++++++++++ ...dundant_has_on_plural_lineitems_snowflake.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_lineitems_sqlite.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_mysql.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_postgres.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_snowflake.sql | 16 ++++++++++++++++ .../redundant_has_on_plural_sqlite.sql | 16 ++++++++++++++++ .../test_sql_refsols/redundant_has_postgres.sql | 7 +++++++ .../redundant_has_singular_chain_ansi.sql | 7 +++++++ .../redundant_has_singular_chain_mysql.sql | 7 +++++++ .../redundant_has_singular_chain_postgres.sql | 7 +++++++ .../redundant_has_singular_chain_snowflake.sql | 7 +++++++ .../redundant_has_singular_chain_sqlite.sql | 7 +++++++ .../test_sql_refsols/redundant_has_snowflake.sql | 7 +++++++ tests/test_sql_refsols/redundant_has_sqlite.sql | 7 +++++++ 30 files changed, 279 insertions(+) create mode 100644 tests/test_plan_refsols/redundant_has.txt create mode 100644 tests/test_plan_refsols/redundant_has_nested.txt create mode 100644 tests/test_plan_refsols/redundant_has_on_plural.txt create mode 100644 tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt create mode 100644 tests/test_plan_refsols/redundant_has_singular_chain.txt create mode 100644 tests/test_sql_refsols/redundant_has_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_nested_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_sqlite.sql diff --git a/tests/test_plan_refsols/redundant_has.txt b/tests/test_plan_refsols/redundant_has.txt new file mode 100644 index 000000000..dafed22e4 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/redundant_has_nested.txt b/tests/test_plan_refsols/redundant_has_nested.txt new file mode 100644 index 000000000..19123d2f9 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_nested.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/redundant_has_on_plural.txt b/tests/test_plan_refsols/redundant_has_on_plural.txt new file mode 100644 index 000000000..b19af0652 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_on_plural.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=SEMI, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=o_totalprice > 400000:numeric, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt b/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt new file mode 100644 index 000000000..8c74d53a4 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_on_plural_lineitems.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=SEMI, columns={}) + SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey}) + FILTER(condition=l_quantity > 49:numeric, columns={'l_orderkey': l_orderkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_quantity': l_quantity}) diff --git a/tests/test_plan_refsols/redundant_has_singular_chain.txt b/tests/test_plan_refsols/redundant_has_singular_chain.txt new file mode 100644 index 000000000..c8679898e --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_singular_chain.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'EUROPE':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_sql_refsols/redundant_has_ansi.sql b/tests/test_sql_refsols/redundant_has_ansi.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_mysql.sql b/tests/test_sql_refsols/redundant_has_mysql.sql new file mode 100644 index 000000000..9dac38536 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +JOIN tpch.NATION AS NATION + ON CUSTOMER.c_nationkey = NATION.n_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_nested_ansi.sql b/tests/test_sql_refsols/redundant_has_nested_ansi.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_mysql.sql b/tests/test_sql_refsols/redundant_has_nested_mysql.sql new file mode 100644 index 000000000..43c6d8d2e --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.SUPPLIER AS SUPPLIER +JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = SUPPLIER.s_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_postgres.sql b/tests/test_sql_refsols/redundant_has_nested_postgres.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_snowflake.sql b/tests/test_sql_refsols/redundant_has_nested_snowflake.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_nested_sqlite.sql b/tests/test_sql_refsols/redundant_has_nested_sqlite.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_nested_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql b/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql new file mode 100644 index 000000000..4b4b766b8 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey AND orders.o_totalprice > 400000 diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql new file mode 100644 index 000000000..bbc3107da --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey AND lineitem.l_quantity > 49 diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql new file mode 100644 index 000000000..d8cbe6be9 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_mysql.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.LINEITEM + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.ORDERS AS ORDERS +LEFT JOIN _u_0 AS _u_0 + ON ORDERS.o_orderkey = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_postgres.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql new file mode 100644 index 000000000..80a8e58a1 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_lineitems_sqlite.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_quantity > 49 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql b/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql new file mode 100644 index 000000000..05d325c6b --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_mysql.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.ORDERS + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +LEFT JOIN _u_0 AS _u_0 + ON CUSTOMER.c_custkey = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql b/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_postgres.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql b/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql b/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql new file mode 100644 index 000000000..921bad30a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_on_plural_sqlite.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + o_custkey AS _u_1 + FROM tpch.orders + WHERE + o_totalprice > 400000 + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_custkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_postgres.sql b/tests/test_sql_refsols/redundant_has_postgres.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql b/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql b/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql new file mode 100644 index 000000000..53a22063b --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_mysql.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.SUPPLIER AS SUPPLIER +JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = SUPPLIER.s_nationkey +JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql b/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_postgres.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql b/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql b/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql new file mode 100644 index 000000000..bc66ae6c2 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_singular_chain_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' diff --git a/tests/test_sql_refsols/redundant_has_snowflake.sql b/tests/test_sql_refsols/redundant_has_snowflake.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' diff --git a/tests/test_sql_refsols/redundant_has_sqlite.sql b/tests/test_sql_refsols/redundant_has_sqlite.sql new file mode 100644 index 000000000..5a8dfef8a --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' From 7d72a7d185cb32b5119a49ff924afd2cb170f8ed Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 27 Jan 2026 13:00:55 -0800 Subject: [PATCH 07/22] Added more testing layers and window bugfixes --- pydough/conversion/hybrid_filter_merger.py | 4 +- pydough/conversion/hybrid_translator.py | 4 +- tests/test_pipeline_tpch_custom.py | 63 ++++++++++++ .../count_multiple_filters_d.txt | 36 +++++++ .../count_multiple_filters_e.txt | 10 ++ .../count_multiple_filters_d_ansi.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_mysql.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_postgres.sql | 96 +++++++++++++++++++ .../count_multiple_filters_d_snowflake.sql | 95 ++++++++++++++++++ .../count_multiple_filters_d_sqlite.sql | 96 +++++++++++++++++++ .../count_multiple_filters_e_ansi.sql | 36 +++++++ .../count_multiple_filters_e_mysql.sql | 36 +++++++ .../count_multiple_filters_e_postgres.sql | 36 +++++++ .../count_multiple_filters_e_snowflake.sql | 36 +++++++ .../count_multiple_filters_e_sqlite.sql | 36 +++++++ 15 files changed, 773 insertions(+), 3 deletions(-) create mode 100644 tests/test_plan_refsols/count_multiple_filters_d.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_e.txt create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_ansi.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_mysql.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_postgres.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql create mode 100644 tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 5b424258e..1ed9083f0 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -144,9 +144,9 @@ def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: result: set[HybridExpr] = set() for operation in reversed(tree.pipeline): if isinstance(operation, HybridFilter): - result.update(operation.condition.get_conjunction()) if operation.condition.contains_window_functions(): break + result.update(operation.condition.get_conjunction()) elif isinstance(operation, HybridLimit): break elif isinstance(operation, HybridCalculate): @@ -180,9 +180,9 @@ def get_filter_stripped_form(self, tree: HybridTree) -> str: stripped_tree = copy.deepcopy(tree) for idx, operation in reversed(list(enumerate(stripped_tree.pipeline))): if isinstance(operation, HybridFilter): - stripped_tree.pipeline.pop(idx) if operation.condition.contains_window_functions(): break + stripped_tree.pipeline.pop(idx) elif isinstance(operation, HybridLimit): break elif isinstance(operation, HybridCalculate): diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index 2d5f60175..bf3617aad 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1739,8 +1739,10 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - # 5. Run the filter-merging procedure. + # 5. Run the filter-merging procedure, then re-run ejecting aggregate + # inputs to clean up any new aggregates created by filter merging. self.run_filter_merging(hybrid) + self.eject_aggregate_inputs(hybrid) # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index e62bdbd1f..d67037a54 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2770,6 +2770,69 @@ ), id="count_multiple_filters_c", ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c2 = customers.WHERE(nation.name == 'GERMANY').WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c3 = customers.WHERE(nation.name == 'GERMANY')\n" + "c4 = customers.WHERE(nation.name == 'CHINA').WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" + "c5 = customers.WHERE((PERCENTILE(by=account_balance.ASC()) == 100) & (nation.name == 'CHINA'))\n" + "c6 = customers.WHERE(nation.name == 'CHINA')\n" + "c6 = customers.WHERE(nation.name == 'CHINA')\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + " n4=COUNT(c4), " + " n5=COUNT(c5), " + " n6=COUNT(c6), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1500], + "n2": [59], + "n3": [5908], + "n4": [60], + "n5": [57], + "n6": [6024], + } + ), + "count_multiple_filters_d", + ), + id="count_multiple_filters_d", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders), " + " n3=COUNT(nations.customers.orders.WHERE(order_priority == '1-URGENT')), " + " n4=COUNT(nations.customers.orders.WHERE(order_priority == '2-HIGH')), " + " n5=COUNT(nations.customers.orders.WHERE(order_priority == '3-MEDIUM')), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [298994, 299103, 301740, 303286, 296877], + "n3": [59767, 59902, 60166, 60373, 60135], + "n4": [59511, 60232, 60246, 60901, 59201], + "n5": [59597, 59230, 60485, 60375, 59036], + } + ), + "count_multiple_filters_e", + ), + id="count_multiple_filters_e", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/count_multiple_filters_d.txt b/tests/test_plan_refsols/count_multiple_filters_d.txt new file mode 100644 index 000000000..b80d0ef1c --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_d.txt @@ -0,0 +1,36 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_agg_1': t1.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt new file mode 100644 index 000000000..548c6def4 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_21, 0:numeric)), ('n5', DEFAULT_TO(sum_sum_expr_22, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_21': t1.sum_sum_expr_21, 'sum_sum_expr_22': t1.sum_sum_expr_22}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_21': SUM(sum_expr_21), 'sum_sum_expr_22': SUM(sum_expr_22)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_21': t1.sum_expr_21, 'sum_expr_22': t1.sum_expr_22}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(o_orderpriority == '1-URGENT':string, 1:numeric, 0:numeric)), 'sum_expr_21': SUM(IFF(o_orderpriority == '2-HIGH':string, 1:numeric, 0:numeric)), 'sum_expr_22': SUM(IFF(o_orderpriority == '3-MEDIUM':string, 1:numeric, 0:numeric))}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql new file mode 100644 index 000000000..479f86891 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql @@ -0,0 +1,96 @@ +WITH _t1 AS ( + SELECT + 1 AS "_" + FROM tpch.customer + QUALIFY + NTILE(100) OVER (ORDER BY c_acctbal NULLS LAST) = 100 +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t1 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t4 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s0.c_acctbal NULLS LAST) = 100 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t4 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t10 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s8.c_acctbal NULLS LAST) = 100 +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t10 +), _t14 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s12.c_acctbal NULLS LAST) = 100 + AND nation.n_name = 'CHINA' +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t14 +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql new file mode 100644 index 000000000..faae8bbfb --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN c_acctbal IS NULL THEN 1 ELSE 0 END, c_acctbal) AS _w + FROM tpch.CUSTOMER +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.CUSTOMER +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.NATION + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN _s0.c_acctbal IS NULL THEN 1 ELSE 0 END, _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.CUSTOMER +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.NATION + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY CASE WHEN _s8.c_acctbal IS NULL THEN 1 ELSE 0 END, _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + NATION.n_name, + NTILE(100) OVER (ORDER BY CASE WHEN _s12.c_acctbal IS NULL THEN 1 ELSE 0 END, _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.NATION AS NATION + ON NATION.n_nationkey = _s12.c_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql new file mode 100644 index 000000000..869decb54 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY c_acctbal) AS _w + FROM tpch.customer +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + nation.n_name, + NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql new file mode 100644 index 000000000..4dbe53347 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql @@ -0,0 +1,95 @@ +WITH _t1 AS ( + SELECT + 1 AS "_" + FROM tpch.customer + QUALIFY + NTILE(100) OVER (ORDER BY c_acctbal) = 100 +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t1 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t4 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s0.c_acctbal) = 100 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t4 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t10 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s8.c_acctbal) = 100 +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t10 +), _t14 AS ( + SELECT + 1 AS "_" + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey + QUALIFY + NTILE(100) OVER (ORDER BY _s12.c_acctbal) = 100 AND nation.n_name = 'CHINA' +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t14 +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql new file mode 100644 index 000000000..869decb54 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql @@ -0,0 +1,96 @@ +WITH _t AS ( + SELECT + NTILE(100) OVER (ORDER BY c_acctbal) AS _w + FROM tpch.customer +), _s2 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t + WHERE + _w = 100 +), _s0 AS ( + SELECT + c_acctbal, + c_nationkey + FROM tpch.customer +), _t6 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _t_2 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w + FROM _s0 AS _s0 + JOIN _t6 AS _t6 + ON _s0.c_nationkey = _t6.n_nationkey +), _s3 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_2 + WHERE + _w = 100 +), _s4 AS ( + SELECT + c_nationkey + FROM tpch.customer +), _s7 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s4 + JOIN _t6 AS _t8 + ON _s4.c_nationkey = _t8.n_nationkey +), _t12 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'CHINA' +), _t_3 AS ( + SELECT + NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w + FROM _s0 AS _s8 + JOIN _t12 AS _t12 + ON _s8.c_nationkey = _t12.n_nationkey +), _s11 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_3 + WHERE + _w = 100 +), _t_4 AS ( + SELECT + nation.n_name, + NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w + FROM _s0 AS _s12 + JOIN tpch.nation AS nation + ON _s12.c_nationkey = nation.n_nationkey +), _s15 AS ( + SELECT + COUNT(*) AS n_rows + FROM _t_4 + WHERE + _w = 100 AND n_name = 'CHINA' +), _s19 AS ( + SELECT + COUNT(*) AS n_rows + FROM _s4 AS _s16 + JOIN _t12 AS _t17 + ON _s16.c_nationkey = _t17.n_nationkey +) +SELECT + _s2.n_rows AS n1, + _s3.n_rows AS n2, + _s7.n_rows AS n3, + _s11.n_rows AS n4, + _s15.n_rows AS n5, + _s19.n_rows AS n6 +FROM _s2 AS _s2 +CROSS JOIN _s3 AS _s3 +CROSS JOIN _s7 AS _s7 +CROSS JOIN _s11 AS _s11 +CROSS JOIN _s15 AS _s15 +CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql new file mode 100644 index 000000000..676f8e1d1 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql new file mode 100644 index 000000000..440100388 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.ORDERS + GROUP BY + 1 +), _s5 AS ( + SELECT + NATION.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.NATION AS NATION + JOIN tpch.CUSTOMER AS CUSTOMER + ON CUSTOMER.c_nationkey = NATION.n_nationkey + LEFT JOIN _s3 AS _s3 + ON CUSTOMER.c_custkey = _s3.o_custkey + GROUP BY + 1 +) +SELECT + REGION.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.REGION AS REGION +JOIN _s5 AS _s5 + ON REGION.r_regionkey = _s5.n_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql new file mode 100644 index 000000000..676f8e1d1 --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, + SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql new file mode 100644 index 000000000..1a13d2d5b --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(IFF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, + SUM(IFF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, + SUM(IFF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql new file mode 100644 index 000000000..13f47047c --- /dev/null +++ b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql @@ -0,0 +1,36 @@ +WITH _s3 AS ( + SELECT + o_custkey, + COUNT(*) AS n_rows, + SUM(IIF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, + SUM(IIF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, + SUM(IIF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + FROM tpch.orders + GROUP BY + 1 +), _s5 AS ( + SELECT + nation.n_regionkey, + COUNT(*) AS n_rows, + SUM(_s3.n_rows) AS sum_n_rows, + SUM(_s3.sum_expr) AS sum_sum_expr, + SUM(_s3.sum_expr_21) AS sum_sum_expr_21, + SUM(_s3.sum_expr_22) AS sum_sum_expr_22 + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey + GROUP BY + 1 +) +SELECT + region.r_name AS region_name, + _s5.n_rows AS n1, + COALESCE(_s5.sum_n_rows, 0) AS n2, + COALESCE(_s5.sum_sum_expr, 0) AS n3, + COALESCE(_s5.sum_sum_expr_21, 0) AS n4, + COALESCE(_s5.sum_sum_expr_22, 0) AS n5 +FROM tpch.region AS region +JOIN _s5 AS _s5 + ON _s5.n_regionkey = region.r_regionkey From 6c06d74d6a07f9738345048e0ab407bae8e0788e Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Wed, 28 Jan 2026 10:26:20 -0800 Subject: [PATCH 08/22] [run CI][run SF][run mysql][run postgres] BIRD menu_5556 and update other tests --- pydough/unqualified/qualification.py | 3 +- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 24 ++++++++++++++++ tests/test_plan_refsols/common_prefix_ab.txt | 4 +-- tests/test_plan_refsols/common_prefix_z.txt | 2 +- tests/test_plan_refsols/menu_5556.txt | 12 ++++++++ .../defog_broker_adv8_mysql.sql | 17 +++-------- .../defog_broker_adv8_postgres.sql | 17 +++-------- .../defog_broker_adv8_snowflake.sql | 17 +++-------- .../defog_broker_adv8_sqlite.sql | 17 +++-------- tests/test_sql_refsols/menu_5556_ansi.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_mysql.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_postgres.sql | 28 +++++++++++++++++++ .../test_sql_refsols/menu_5556_snowflake.sql | 28 +++++++++++++++++++ tests/test_sql_refsols/menu_5556_sqlite.sql | 28 +++++++++++++++++++ 15 files changed, 198 insertions(+), 57 deletions(-) create mode 100644 tests/test_plan_refsols/menu_5556.txt create mode 100644 tests/test_sql_refsols/menu_5556_ansi.sql create mode 100644 tests/test_sql_refsols/menu_5556_mysql.sql create mode 100644 tests/test_sql_refsols/menu_5556_postgres.sql create mode 100644 tests/test_sql_refsols/menu_5556_snowflake.sql create mode 100644 tests/test_sql_refsols/menu_5556_sqlite.sql diff --git a/pydough/unqualified/qualification.py b/pydough/unqualified/qualification.py index c1566b8a6..e6e643cb5 100644 --- a/pydough/unqualified/qualification.py +++ b/pydough/unqualified/qualification.py @@ -595,7 +595,8 @@ def qualify_access( ) if ( - isinstance(qualified_parent, GlobalContext) + isinstance(unqualified_parent, UnqualifiedRoot) + and isinstance(qualified_parent, GlobalContext) and name == qualified_parent.graph.name and not is_child ) or ( diff --git a/tests/conftest.py b/tests/conftest.py index 3bf635206..8334590e0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -682,7 +682,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 5f88bd717..9570029f2 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -113,6 +113,30 @@ ), id="wdi_albania_footnotes_1978", ), + pytest.param( + PyDoughPandasTest( + """ +result = menu.menu.WHERE( + HAS(menupages.menuitems.dish.WHERE(LOWER(name) == "baked apples with cream")) + ).CALCULATE( + sponsor_name=sponsor, + max_item_price=MAX(menupages.menuitems.price) + ).TOP_K( + 1, by=max_item_price.DESC() + ).CALCULATE( + sponsor=sponsor_name + ) + """, + "menu", + lambda: pd.DataFrame( + { + "sponsor": ["MURRAY HILL HOTEL"], + } + ), + "menu_5556", + ), + id="menu_5556", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/common_prefix_ab.txt b/tests/test_plan_refsols/common_prefix_ab.txt index 08996397b..93550a02f 100644 --- a/tests/test_plan_refsols/common_prefix_ab.txt +++ b/tests/test_plan_refsols/common_prefix_ab.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_custkey == t1.c_custkey, type=SEMI, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey}) FILTER(condition=c_acctbal > 0.0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_z.txt b/tests/test_plan_refsols/common_prefix_z.txt index 05678bd68..946eb0b41 100644 --- a/tests/test_plan_refsols/common_prefix_z.txt +++ b/tests/test_plan_refsols/common_prefix_z.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', c_name), ('nation_name', n_name)], orderings=[(c_name):asc_first], limit=5:numeric) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t0.c_name, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_name': c_name, 'c_nationkey': c_nationkey}) - JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=SEMI, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/menu_5556.txt b/tests/test_plan_refsols/menu_5556.txt new file mode 100644 index 000000000..1b56e06ba --- /dev/null +++ b/tests/test_plan_refsols/menu_5556.txt @@ -0,0 +1,12 @@ +ROOT(columns=[('sponsor', sponsor)], orderings=[(max_price):desc_last], limit=1:numeric) + JOIN(condition=t0.id == t1.menu_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_price': t1.max_price, 'sponsor': t0.sponsor}) + SCAN(table=main.Menu, columns={'id': id, 'sponsor': sponsor}) + FILTER(condition=sum_n_rows != 0:numeric, columns={'max_price': max_price, 'menu_id': menu_id}) + AGGREGATE(keys={'menu_id': menu_id}, aggregations={'max_price': MAX(price), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.dish_id == t1.id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'menu_id': t0.menu_id, 'n_rows': t1.n_rows, 'price': t0.price}) + JOIN(condition=t0.id == t1.menu_page_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'dish_id': t1.dish_id, 'menu_id': t0.menu_id, 'price': t1.price}) + SCAN(table=main.MenuPage, columns={'id': id, 'menu_id': menu_id}) + SCAN(table=main.MenuItem, columns={'dish_id': dish_id, 'menu_page_id': menu_page_id, 'price': price}) + PROJECT(columns={'id': id, 'n_rows': 1:numeric}) + FILTER(condition=LOWER(name) == 'baked apples with cream':string, columns={'id': id}) + SCAN(table=main.Dish, columns={'id': id, 'name': name}) diff --git a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql index 17fb88b4a..743e4da74 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_mysql.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_mysql.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbCustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbTransaction.sbtxamount), 0) AS total_amount FROM main.sbTransaction AS sbTransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbTransaction.sbtxcustid +JOIN main.sbCustomer AS sbCustomer + ON LOWER(sbCustomer.sbcustcountry) = 'usa' + AND sbCustomer.sbcustid = sbTransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbTransaction.sbtxdatetime < CAST(DATE_SUB( + sbTransaction.sbtxdatetime < CAST(DATE_SUB( CURRENT_TIMESTAMP(), INTERVAL ( ( diff --git a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql index 26080ce78..f8c0db2f0 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_postgres.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_postgres.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', CURRENT_TIMESTAMP - CAST(( EXTRACT(DOW FROM CURRENT_TIMESTAMP) + 6 diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql index 0a72bb5a5..eb1b10906 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE_TRUNC( + sbtransaction.sbtxdatetime < DATE_TRUNC( 'DAY', DATEADD( DAY, diff --git a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql index b94aec102..f29c3de35 100644 --- a/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql +++ b/tests/test_sql_refsols/defog_broker_adv8_sqlite.sql @@ -1,21 +1,12 @@ -WITH _u_0 AS ( - SELECT - sbcustid AS _u_1 - FROM main.sbcustomer - WHERE - LOWER(sbcustcountry) = 'usa' - GROUP BY - 1 -) SELECT NULLIF(COUNT(*), 0) AS n_transactions, COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount FROM main.sbtransaction AS sbtransaction -LEFT JOIN _u_0 AS _u_0 - ON _u_0._u_1 = sbtransaction.sbtxcustid +JOIN main.sbcustomer AS sbcustomer + ON LOWER(sbcustomer.sbcustcountry) = 'usa' + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid WHERE - NOT _u_0._u_1 IS NULL - AND sbtransaction.sbtxdatetime < DATE( + sbtransaction.sbtxdatetime < DATE( 'now', '-' || CAST(( CAST(STRFTIME('%w', DATETIME('now')) AS INTEGER) + 6 diff --git a/tests/test_sql_refsols/menu_5556_ansi.sql b/tests/test_sql_refsols/menu_5556_ansi.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_ansi.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_mysql.sql b/tests/test_sql_refsols/menu_5556_mysql.sql new file mode 100644 index 000000000..0e0ec128a --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_mysql.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.Dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + MenuPage.menu_id, + MAX(MenuItem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.MenuPage AS MenuPage + JOIN main.MenuItem AS MenuItem + ON MenuItem.menu_page_id = MenuPage.id + LEFT JOIN _s3 AS _s3 + ON MenuItem.dish_id = _s3.id + GROUP BY + 1 +) +SELECT + Menu.sponsor +FROM main.Menu AS Menu +JOIN _t1 AS _t1 + ON Menu.id = _t1.menu_id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_postgres.sql b/tests/test_sql_refsols/menu_5556_postgres.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_postgres.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_snowflake.sql b/tests/test_sql_refsols/menu_5556_snowflake.sql new file mode 100644 index 000000000..f93c707b5 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_snowflake.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/menu_5556_sqlite.sql b/tests/test_sql_refsols/menu_5556_sqlite.sql new file mode 100644 index 000000000..18f7336f2 --- /dev/null +++ b/tests/test_sql_refsols/menu_5556_sqlite.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + 1 AS n_rows, + id + FROM main.dish + WHERE + LOWER(name) = 'baked apples with cream' +), _t1 AS ( + SELECT + menupage.menu_id, + MAX(menuitem.price) AS max_price, + SUM(_s3.n_rows) AS sum_n_rows + FROM main.menupage AS menupage + JOIN main.menuitem AS menuitem + ON menuitem.menu_page_id = menupage.id + LEFT JOIN _s3 AS _s3 + ON _s3.id = menuitem.dish_id + GROUP BY + 1 +) +SELECT + menu.sponsor +FROM main.menu AS menu +JOIN _t1 AS _t1 + ON _t1.menu_id = menu.id AND _t1.sum_n_rows <> 0 +ORDER BY + _t1.max_price DESC +LIMIT 1 From 307c60138bf839c8d6660bb81cd59ea59a85352a Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:05:59 -0800 Subject: [PATCH 09/22] Resolving bugs and updating tests [RUN CI] --- pydough/conversion/hybrid_filter_merger.py | 2 ++ pydough/conversion/hybrid_translator.py | 3 -- pydough/conversion/hybrid_tree.py | 4 +++ .../conversion/relational_simplification.py | 26 +++++++++++++-- .../database_connectors/database_connector.py | 1 - tests/test_metadata/masked_graphs.json | 2 +- tests/test_plan_refsols/aggregate_semi.txt | 2 +- tests/test_plan_refsols/common_prefix_n.txt | 14 ++++---- tests/test_plan_refsols/common_prefix_o.txt | 16 ++++----- tests/test_plan_refsols/correl_14.txt | 2 +- tests/test_plan_refsols/correl_15.txt | 2 +- tests/test_plan_refsols/correl_35.txt | 23 +++++++------ .../count_multiple_filters_a.txt | 4 +-- .../count_multiple_filters_b.txt | 8 ++--- .../count_multiple_filters_c.txt | 4 +-- .../count_multiple_filters_e.txt | 2 +- .../cryptbank_general_join_01_raw.txt | 2 +- .../cryptbank_general_join_01_rewrite.txt | 2 +- .../cryptbank_general_join_02_raw.txt | 2 +- .../cryptbank_general_join_02_rewrite.txt | 2 +- tests/test_plan_refsols/semi_aggregate.txt | 2 +- .../defog_test_functions.py | 6 ++-- tests/test_sql_refsols/correl_14_sqlite.sql | 2 +- tests/test_sql_refsols/correl_15_sqlite.sql | 2 +- tests/test_sql_refsols/correl_31_sqlite.sql | 4 +-- tests/test_sql_refsols/correl_35_sqlite.sql | 33 ++++++------------- .../count_multiple_filters_a_ansi.sql | 2 +- .../count_multiple_filters_a_mysql.sql | 2 +- .../count_multiple_filters_a_snowflake.sql | 2 +- .../count_multiple_filters_a_sqlite.sql | 2 +- .../count_multiple_filters_b_ansi.sql | 20 +++++------ .../count_multiple_filters_b_mysql.sql | 20 +++++------ .../count_multiple_filters_b_postgres.sql | 20 +++++------ .../count_multiple_filters_b_snowflake.sql | 20 +++++------ .../count_multiple_filters_b_sqlite.sql | 20 +++++------ .../count_multiple_filters_c_ansi.sql | 20 +++++------ .../count_multiple_filters_c_mysql.sql | 20 +++++------ .../count_multiple_filters_c_snowflake.sql | 22 +++++-------- .../count_multiple_filters_c_sqlite.sql | 20 +++++------ .../count_multiple_filters_e_ansi.sql | 6 ++-- .../count_multiple_filters_e_mysql.sql | 6 ++-- .../count_multiple_filters_e_snowflake.sql | 6 ++-- .../count_multiple_filters_e_sqlite.sql | 6 ++-- .../defog_restaurants_gen11_ansi.sql | 2 +- .../defog_restaurants_gen11_mysql.sql | 2 +- .../defog_restaurants_gen11_snowflake.sql | 2 +- .../defog_restaurants_gen11_sqlite.sql | 2 +- .../defog_restaurants_gen14_ansi.sql | 8 ++++- .../defog_restaurants_gen14_mysql.sql | 8 ++++- .../defog_restaurants_gen14_postgres.sql | 8 ++++- .../defog_restaurants_gen14_snowflake.sql | 8 ++++- .../defog_restaurants_gen14_sqlite.sql | 8 ++++- .../defog_restaurants_gen15_ansi.sql | 2 +- .../defog_restaurants_gen15_mysql.sql | 2 +- .../defog_restaurants_gen15_postgres.sql | 2 +- .../defog_restaurants_gen15_sqlite.sql | 2 +- .../defog_restaurants_gen8_ansi.sql | 2 +- .../defog_restaurants_gen8_mysql.sql | 2 +- .../defog_restaurants_gen8_snowflake.sql | 2 +- .../defog_restaurants_gen8_sqlite.sql | 2 +- 60 files changed, 236 insertions(+), 214 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 1ed9083f0..b6280b219 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -117,6 +117,8 @@ def merge_filters(self, tree: HybridTree) -> None: for operation in tree.pipeline: operation.replace_expressions(replacement_map) + tree.remove_dead_children(set()) + # Run the procedure recursively on the parent tree and the child # subtrees. if tree.parent is not None: diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index bf3617aad..21d554f5c 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1746,7 +1746,4 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: # 7. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) - # 8. Remove any dead children in the hybrid tree that are no longer - # being used. - hybrid.remove_dead_children(set()) return hybrid diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index aeea1bf38..ab716efb5 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -980,6 +980,10 @@ def remove_dead_children(self, must_remove: set[int]) -> dict[int, int]: ) or self.children[child_idx].connection_type.is_anti: children_to_delete.discard(child_idx) + print() + print(self) + print(children_to_delete) + if len(children_to_delete) == 0: return {i: i for i in range(len(self.children))} diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 45a40cd16..103de3e2d 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -870,8 +870,8 @@ def simplify_function_call( output_predicates.positive = True output_expr = CallExpression(pydop.COUNT, expr.data_type, []) - # All of these operators are non-null or non-negative if their - # first argument is. + # All of these operators are non-null, non-negative, or positive if + # their first argument is. case ( pydop.SUM | pydop.AVG @@ -882,8 +882,28 @@ def simplify_function_call( | pydop.QUANTILE ): output_predicates |= arg_predicates[0] & PredicateSet( - not_null=True, not_negative=True + not_null=True, + not_negative=True, + positive=True, ) + if expr.op == pydop.SUM: + if ( + isinstance(expr.inputs[0], CallExpression) + and expr.inputs[0].op == pydop.IFF + ): + # SUM(IFF(cond, 1, 0)) -> SUM(cond) + cond_arg: RelationalExpression = expr.inputs[0].inputs[0] + first_arg: RelationalExpression = expr.inputs[0].inputs[1] + second_arg: RelationalExpression = expr.inputs[0].inputs[2] + if ( + isinstance(first_arg, LiteralExpression) + and first_arg.value in (1, 1.0, True) + and isinstance(second_arg, LiteralExpression) + and second_arg.value in (0, 0.0, False) + ): + output_expr = CallExpression( + pydop.SUM, expr.data_type, [cond_arg] + ) # INTEGER(x) -> x if x is a literal integer. Also simplify for # booleans. diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index 303c7581c..b34189442 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -52,7 +52,6 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: try: self.cursor.execute(sql) except Exception as e: - breakpoint() print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") raise pydough.active_session.error_builder.sql_runtime_failure( sql, e, True diff --git a/tests/test_metadata/masked_graphs.json b/tests/test_metadata/masked_graphs.json index 7ff27c758..8f61d3508 100644 --- a/tests/test_metadata/masked_graphs.json +++ b/tests/test_metadata/masked_graphs.json @@ -384,7 +384,7 @@ "original parent": "branches", "original property": "same_state_customers", "singular": false, - "always matches": true, + "always matches": false, "description": "All branches located in the same state as the customer", "synonyms": ["branches in same state", "local branches"] } diff --git a/tests/test_plan_refsols/aggregate_semi.txt b/tests/test_plan_refsols/aggregate_semi.txt index 6c99fbc08..6c4ccc09c 100644 --- a/tests/test_plan_refsols/aggregate_semi.txt +++ b/tests/test_plan_refsols/aggregate_semi.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_plan_refsols/common_prefix_n.txt b/tests/test_plan_refsols/common_prefix_n.txt index 2cf16811e..1295aedfe 100644 --- a/tests/test_plan_refsols/common_prefix_n.txt +++ b/tests/test_plan_refsols/common_prefix_n.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_agg, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_agg': t1.sum_agg, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice}) +ROOT(columns=[('key', l_orderkey), ('order_date', anything_o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', DEFAULT_TO(sum_sum_sum_expr, 0:numeric))], orderings=[(anything_o_orderdate):desc_last, (l_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'anything_o_orderdate': anything_o_orderdate, 'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + JOIN(condition=t0.l_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_o_orderdate': t0.anything_o_orderdate, 'l_orderkey': t0.l_orderkey, 'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'anything_o_orderdate': ANYTHING(o_orderdate), 'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_orderkey': t1.l_orderkey, 'o_orderdate': t0.o_orderdate, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t1.sum_n_rows, 'sum_p_retailprice': t1.sum_p_retailprice, 'sum_sum_expr': t1.sum_sum_expr}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_o.txt b/tests/test_plan_refsols/common_prefix_o.txt index f9f6f44f3..614e51530 100644 --- a/tests/test_plan_refsols/common_prefix_o.txt +++ b/tests/test_plan_refsols/common_prefix_o.txt @@ -1,13 +1,13 @@ -ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_agg)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) - FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t0.sum_sum_agg, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_agg': t1.sum_sum_agg, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice}) +ROOT(columns=[('key', o_orderkey), ('order_date', o_orderdate), ('n_elements', DEFAULT_TO(sum_sum_n_rows, 0:numeric)), ('total_retail_price', DEFAULT_TO(sum_sum_p_retailprice, 0:numeric)), ('n_unique_supplier_nations', DEFAULT_TO(ndistinct_n_name, 0:numeric)), ('max_supplier_balance', max_s_acctbal), ('n_small_parts', sum_sum_sum_expr)], orderings=[(o_orderdate):desc_last, (o_orderkey):asc_first], limit=5:numeric) + FILTER(condition=DEFAULT_TO(sum_sum_n_rows, 0:numeric) > DEFAULT_TO(ndistinct_n_name, 0:numeric), columns={'max_s_acctbal': max_s_acctbal, 'ndistinct_n_name': ndistinct_n_name, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t0.max_s_acctbal, 'ndistinct_n_name': t1.ndistinct_n_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t0.sum_sum_n_rows, 'sum_sum_p_retailprice': t0.sum_sum_p_retailprice, 'sum_sum_sum_expr': t0.sum_sum_sum_expr}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'max_s_acctbal': t1.max_s_acctbal, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'sum_sum_n_rows': t1.sum_sum_n_rows, 'sum_sum_p_retailprice': t1.sum_sum_p_retailprice, 'sum_sum_sum_expr': t1.sum_sum_sum_expr}) FILTER(condition=YEAR(o_orderdate) == 1996:numeric & ISIN(MONTH(o_orderdate), [10, 11, 12]:array[numeric]), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=sum_sum_agg != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_agg': sum_sum_agg, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_agg': SUM(sum_agg), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_agg': t0.sum_agg, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_agg': SUM(IFF(STARTSWITH(p_container, 'SM':string), 1:numeric, 0:numeric)), 'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice)}) + FILTER(condition=sum_sum_sum_expr != 0:numeric, columns={'l_orderkey': l_orderkey, 'max_s_acctbal': max_s_acctbal, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_p_retailprice': sum_sum_p_retailprice, 'sum_sum_sum_expr': sum_sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey}, aggregations={'max_s_acctbal': MAX(s_acctbal), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_p_retailprice': SUM(sum_p_retailprice), 'sum_sum_sum_expr': SUM(sum_sum_expr)}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 's_acctbal': t1.s_acctbal, 'sum_n_rows': t0.sum_n_rows, 'sum_p_retailprice': t0.sum_p_retailprice, 'sum_sum_expr': t0.sum_sum_expr}) + AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}, aggregations={'sum_n_rows': SUM(n_rows), 'sum_p_retailprice': SUM(p_retailprice), 'sum_sum_expr': SUM(STARTSWITH(p_container, 'SM':string))}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_container': t1.p_container, 'p_retailprice': t1.p_retailprice}) AGGREGATE(keys={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT()}) FILTER(condition=MONTH(l_shipdate) == 11:numeric & YEAR(l_shipdate) == 1996:numeric, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/correl_14.txt b/tests/test_plan_refsols/correl_14.txt index 125f405ef..22b4f80d8 100644 --- a/tests/test_plan_refsols/correl_14.txt +++ b/tests/test_plan_refsols/correl_14.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) JOIN(condition=t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) FILTER(condition=s_acctbal < 1000:numeric & s_nationkey == 19:numeric, columns={'s_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/correl_15.txt b/tests/test_plan_refsols/correl_15.txt index 4b184d5bf..7681d553f 100644 --- a/tests/test_plan_refsols/correl_15.txt +++ b/tests/test_plan_refsols/correl_15.txt @@ -2,7 +2,7 @@ ROOT(columns=[('n', ndistinct_ps_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_ps_suppkey': NDISTINCT(ps_suppkey)}) JOIN(condition=t1.p_retailprice < t0.anything_avg_p_retailprice * 0.85:numeric & t1.p_retailprice < t0.ps_supplycost * 1.5:numeric & t1.p_retailprice < t0.sum_p_retailprice / t0.sum_expr & t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'anything_avg_p_retailprice': t0.anything_avg_p_retailprice, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'ps_supplycost': t1.ps_supplycost, 'sum_expr': t0.sum_expr, 'sum_p_retailprice': t0.sum_p_retailprice}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'anything_avg_p_retailprice': ANYTHING(avg_p_retailprice), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'avg_p_retailprice': t0.avg_p_retailprice, 'p_retailprice': t1.p_retailprice, 'ps_suppkey': t1.ps_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_p_retailprice': t0.avg_p_retailprice, 's_suppkey': t1.s_suppkey}) AGGREGATE(keys={}, aggregations={'avg_p_retailprice': AVG(p_retailprice)}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index 4aaab2b34..af49adab5 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -10,15 +10,14 @@ ROOT(columns=[('n', n_rows)], orderings=[]) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=sum_n_rows != 0:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t0.n_rows, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) - AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - FILTER(condition=YEAR(l_shipdate) == 1997:numeric & ISIN(MONTH(l_shipdate), [1, 2, 3]:array[numeric]), columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) - SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) - SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'o_orderpriority': o_orderpriority, 'p_type': p_type}, aggregations={}) + JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t1.p_type}) + AGGREGATE(keys={'c_custkey': c_custkey, 'c_nationkey': c_nationkey, 'l_partkey': l_partkey, 'o_orderpriority': o_orderpriority}, aggregations={}) + JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'l_partkey': t1.l_partkey, 'o_orderpriority': t0.o_orderpriority}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=YEAR(o_orderdate) == 1997:numeric, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=YEAR(l_shipdate) == 1997:numeric & ISIN(MONTH(l_shipdate), [1, 2, 3]:array[numeric]), columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey}) + SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_partkey': l_partkey, 'l_shipdate': l_shipdate}) + SCAN(table=tpch.PART, columns={'p_partkey': p_partkey, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/count_multiple_filters_a.txt b/tests/test_plan_refsols/count_multiple_filters_a.txt index c4182bc18..58fde44d6 100644 --- a/tests/test_plan_refsols/count_multiple_filters_a.txt +++ b/tests/test_plan_refsols/count_multiple_filters_a.txt @@ -1,4 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string)}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 281cd5a3c..3ebaf6592 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_6), ('n4', agg_7), ('n5', agg_8), ('n6', agg_9)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'agg_6': t0.agg_6, 'agg_7': t0.agg_7, 'agg_8': t1.agg_8, 'agg_9': t0.agg_9, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'agg_6': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'agg_7': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'agg_9': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'agg_8': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n_rows': COUNT()}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index 78d1de0ad..fed82fccd 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,3 +1,3 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2), ('n3', n3), ('n4', n4), ('n5', n5), ('n6', n6)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n2': SUM(IFF(c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n3': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric), 1:numeric, 0:numeric)), 'n4': SUM(IFF(STARTSWITH(c_phone, '11':string), 1:numeric, 0:numeric)), 'n5': SUM(IFF(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n6': SUM(IFF(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string, 1:numeric, 0:numeric)), 'n_rows': COUNT()}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt index 548c6def4..b67bcbb02 100644 --- a/tests/test_plan_refsols/count_multiple_filters_e.txt +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -6,5 +6,5 @@ ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_r JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(o_orderpriority == '1-URGENT':string, 1:numeric, 0:numeric)), 'sum_expr_21': SUM(IFF(o_orderpriority == '2-HIGH':string, 1:numeric, 0:numeric)), 'sum_expr_22': SUM(IFF(o_orderpriority == '3-MEDIUM':string, 1:numeric, 0:numeric))}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_21': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_22': SUM(o_orderpriority == '3-MEDIUM':string)}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 641e01b1c..59649defb 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -6,7 +6,7 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_lo SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 641e01b1c..59649defb 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -6,7 +6,7 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_lo SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt index e79570ab4..2300e583e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt @@ -2,6 +2,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt index e79570ab4..2300e583e 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt @@ -2,6 +2,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/semi_aggregate.txt b/tests/test_plan_refsols/semi_aggregate.txt index 6c99fbc08..6c4ccc09c 100644 --- a/tests/test_plan_refsols/semi_aggregate.txt +++ b/tests/test_plan_refsols/semi_aggregate.txt @@ -1,7 +1,7 @@ ROOT(columns=[('name', s_name), ('num_10parts', n_rows), ('avg_price_of_10parts', sum_p_retailprice / sum_expr), ('sum_price_of_10parts', DEFAULT_TO(sum_p_retailprice, 0:numeric))], orderings=[]) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 's_name': t0.s_name, 'sum_expr': t1.sum_expr, 'sum_p_retailprice': t1.sum_p_retailprice}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_suppkey': s_suppkey}) - AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(IFF(PRESENT(p_retailprice), 1:numeric, 0:numeric)), 'sum_p_retailprice': SUM(p_retailprice)}) + AGGREGATE(keys={'ps_suppkey': ps_suppkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(PRESENT(p_retailprice)), 'sum_p_retailprice': SUM(p_retailprice)}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_retailprice': t1.p_retailprice, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_size == 10:numeric, columns={'p_partkey': p_partkey, 'p_retailprice': p_retailprice}) diff --git a/tests/test_pydough_functions/defog_test_functions.py b/tests/test_pydough_functions/defog_test_functions.py index 8a2770086..600b4700c 100644 --- a/tests/test_pydough_functions/defog_test_functions.py +++ b/tests/test_pydough_functions/defog_test_functions.py @@ -2967,8 +2967,8 @@ def impl_defog_restaurants_gen14(): non-vegan food in San Francisco? Match food_type case insensitively """ sf_restaurants = restaurants.WHERE(LOWER(city_name) == "san francisco") - n_vegan = SUM(LOWER(sf_restaurants.food_type) == "vegan") - n_non_vegan = SUM(LOWER(sf_restaurants.food_type) != "vegan") + n_vegan = COUNT(sf_restaurants.WHERE(LOWER(food_type) == "vegan")) + n_non_vegan = COUNT(sf_restaurants) - n_vegan return Restaurants.CALCULATE( ratio=(n_vegan / KEEP_IF(n_non_vegan, n_non_vegan != 0)) ) @@ -2983,7 +2983,7 @@ def impl_defog_restaurants_gen15(): Los Angeles? """ la_restaurants = restaurants.WHERE(LOWER(city_name) == "los angeles") - n_la_italian = SUM(LOWER(la_restaurants.food_type) == "italian") + n_la_italian = COUNT(la_restaurants.WHERE(LOWER(food_type) == "italian")) n_la = COUNT(la_restaurants) return Restaurants.CALCULATE(ratio=(n_la_italian / KEEP_IF(n_la, n_la != 0))) diff --git a/tests/test_sql_refsols/correl_14_sqlite.sql b/tests/test_sql_refsols/correl_14_sqlite.sql index cc4c44913..2340e279e 100644 --- a/tests/test_sql_refsols/correl_14_sqlite.sql +++ b/tests/test_sql_refsols/correl_14_sqlite.sql @@ -1,7 +1,7 @@ WITH _s4 AS ( SELECT partsupp.ps_suppkey, - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(NOT part.p_retailprice IS NULL) AS sum_expr, SUM(part.p_retailprice) AS sum_p_retailprice FROM tpch.supplier AS supplier JOIN tpch.partsupp AS partsupp diff --git a/tests/test_sql_refsols/correl_15_sqlite.sql b/tests/test_sql_refsols/correl_15_sqlite.sql index e93da2ce7..8fbf54a41 100644 --- a/tests/test_sql_refsols/correl_15_sqlite.sql +++ b/tests/test_sql_refsols/correl_15_sqlite.sql @@ -6,7 +6,7 @@ WITH _s0 AS ( SELECT partsupp.ps_suppkey, MAX(_s0.avg_p_retailprice) AS anything_avg_p_retailprice, - SUM(IIF(NOT part.p_retailprice IS NULL, 1, 0)) AS sum_expr, + SUM(NOT part.p_retailprice IS NULL) AS sum_expr, SUM(part.p_retailprice) AS sum_p_retailprice FROM _s0 AS _s0 JOIN tpch.supplier AS supplier diff --git a/tests/test_sql_refsols/correl_31_sqlite.sql b/tests/test_sql_refsols/correl_31_sqlite.sql index 8376c2ada..69dbc030b 100644 --- a/tests/test_sql_refsols/correl_31_sqlite.sql +++ b/tests/test_sql_refsols/correl_31_sqlite.sql @@ -22,7 +22,7 @@ WITH _t1 AS ( 1 - lineitem.l_discount ) ELSE NULL - END AS expr_2 + END AS expr_7 FROM tpch.nation AS nation JOIN tpch.region AS region ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' @@ -46,7 +46,7 @@ SELECT AVG(l_extendedprice * ( 1 - l_discount )) AS mean_rev, - AVG(expr_2) AS median_rev + AVG(expr_7) AS median_rev FROM _t1 GROUP BY n_nationkey diff --git a/tests/test_sql_refsols/correl_35_sqlite.sql b/tests/test_sql_refsols/correl_35_sqlite.sql index 16cab7b6a..fb6b735d7 100644 --- a/tests/test_sql_refsols/correl_35_sqlite.sql +++ b/tests/test_sql_refsols/correl_35_sqlite.sql @@ -4,12 +4,11 @@ WITH _s1 AS ( p_type FROM tpch.part ), _s10 AS ( - SELECT + SELECT DISTINCT customer.c_custkey, customer.c_nationkey, lineitem.l_partkey, - orders.o_orderpriority, - COUNT(*) AS n_rows + orders.o_orderpriority FROM tpch.customer AS customer JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1997 @@ -18,26 +17,15 @@ WITH _s1 AS ( ON CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1997 AND CAST(STRFTIME('%m', lineitem.l_shipdate) AS INTEGER) IN (1, 2, 3) AND lineitem.l_orderkey = orders.o_orderkey - GROUP BY - 1, - 2, - 3, - 4 -), _t3 AS ( - SELECT +), _s13 AS ( + SELECT DISTINCT _s10.c_custkey, _s10.c_nationkey, _s10.o_orderpriority, - _s11.p_type, - SUM(_s10.n_rows) AS sum_n_rows + _s11.p_type FROM _s10 AS _s10 JOIN _s1 AS _s11 ON _s10.l_partkey = _s11.p_partkey - GROUP BY - 1, - 2, - 3, - 4 ) SELECT COUNT(*) AS n @@ -49,11 +37,10 @@ JOIN tpch.supplier AS supplier JOIN tpch.orders AS orders ON CAST(STRFTIME('%Y', orders.o_orderdate) AS INTEGER) = 1998 AND lineitem.l_orderkey = orders.o_orderkey -JOIN _t3 AS _t3 - ON _s1.p_type = _t3.p_type - AND _t3.c_custkey = orders.o_custkey - AND _t3.c_nationkey = supplier.s_nationkey - AND _t3.o_orderpriority = orders.o_orderpriority - AND _t3.sum_n_rows <> 0 +JOIN _s13 AS _s13 + ON _s1.p_type = _s13.p_type + AND _s13.c_custkey = orders.o_custkey + AND _s13.c_nationkey = supplier.s_nationkey + AND _s13.o_orderpriority = orders.o_orderpriority WHERE CAST(STRFTIME('%Y', lineitem.l_shipdate) AS INTEGER) = 1998 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql index ba0018b62..5a423e7e5 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql index a43c29266..184cf2b26 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.CUSTOMER WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql index 6adc35063..7150be416 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 + COUNT_IF(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql index 703d56924..5a423e7e5 100644 --- a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql @@ -1,6 +1,6 @@ SELECT COUNT(*) AS n1, - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2 + SUM(c_mktsegment = 'BUILDING') AS n2 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql index 92f7c16eb..4f96f56d6 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql index 287a8d2e6..79cd17fa8 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.CUSTOMER WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.CUSTOMER WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql index 92f7c16eb..8ce688027 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS agg_6, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_7, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS sum_expr, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_11, + SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql index bc13323f9..a6a0d4dfd 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_7, - SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + COUNT_IF(c_mktsegment = 'BUILDING') AS sum_expr, + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr_11, + COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql index 7cddc1ce8..4f96f56d6 100644 --- a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql @@ -1,16 +1,16 @@ WITH _s0 AS ( SELECT - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS agg_6, - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_7, - SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS agg_9, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_mktsegment = 'BUILDING') AS sum_expr, + SUM(c_phone LIKE '11%') AS sum_expr_11, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 FROM tpch.customer WHERE c_acctbal <= 600 AND c_acctbal >= 500 ), _s1 AS ( SELECT - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS agg_8, - COUNT(*) AS n_rows + COUNT(*) AS n_rows, + SUM(c_phone LIKE '11%') AS sum_expr FROM tpch.customer WHERE c_mktsegment = 'BUILDING' @@ -18,9 +18,9 @@ WITH _s0 AS ( SELECT _s0.n_rows AS n1, _s1.n_rows AS n2, - _s0.agg_6 AS n3, - _s0.agg_7 AS n4, - _s1.agg_8 AS n5, - _s0.agg_9 AS n6 + _s0.sum_expr AS n3, + _s0.sum_expr_11 AS n4, + _s1.sum_expr AS n5, + _s0.sum_expr_12 AS n6 FROM _s0 AS _s0 CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql index 3861720ce..1d410f585 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, - SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - CASE - WHEN c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - THEN 1 - ELSE 0 - END + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql index a3e0f4986..2926641ca 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, - SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - CASE - WHEN c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - THEN 1 - ELSE 0 - END + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.CUSTOMER diff --git a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql index 609166553..dc9d70b1d 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(IFF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, - SUM(IFF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, - SUM(IFF(STARTSWITH(c_phone, '11'), 1, 0)) AS n4, - SUM(IFF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING', 1, 0)) AS n5, - SUM( - IFF( - STARTSWITH(c_phone, '11') - AND c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING', - 1, - 0 - ) + COUNT_IF(c_mktsegment = 'BUILDING') AS n2, + COUNT_IF(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + COUNT_IF(STARTSWITH(c_phone, '11')) AS n4, + COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS n5, + COUNT_IF( + STARTSWITH(c_phone, '11') + AND c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql index 8cabf3bfd..1d410f585 100644 --- a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql @@ -1,17 +1,13 @@ SELECT COUNT(*) AS n1, - SUM(IIF(c_mktsegment = 'BUILDING', 1, 0)) AS n2, - SUM(IIF(c_acctbal <= 600 AND c_acctbal >= 500, 1, 0)) AS n3, - SUM(IIF(c_phone LIKE '11%', 1, 0)) AS n4, - SUM(IIF(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%', 1, 0)) AS n5, + SUM(c_mktsegment = 'BUILDING') AS n2, + SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, + SUM(c_phone LIKE '11%') AS n4, + SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, SUM( - IIF( - c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%', - 1, - 0 - ) + c_acctbal <= 600 + AND c_acctbal >= 500 + AND c_mktsegment = 'BUILDING' + AND c_phone LIKE '11%' ) AS n6 FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql index 676f8e1d1..34e9c3fc0 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, - SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql index 440100388..eb63b410b 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, - SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.ORDERS GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql index 1a13d2d5b..e194c3bd4 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(IFF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, - SUM(IFF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, - SUM(IFF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + COUNT_IF(o_orderpriority = '1-URGENT') AS sum_expr, + COUNT_IF(o_orderpriority = '2-HIGH') AS sum_expr_21, + COUNT_IF(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql index 13f47047c..34e9c3fc0 100644 --- a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql +++ b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql @@ -2,9 +2,9 @@ WITH _s3 AS ( SELECT o_custkey, COUNT(*) AS n_rows, - SUM(IIF(o_orderpriority = '1-URGENT', 1, 0)) AS sum_expr, - SUM(IIF(o_orderpriority = '2-HIGH', 1, 0)) AS sum_expr_21, - SUM(IIF(o_orderpriority = '3-MEDIUM', 1, 0)) AS sum_expr_22 + SUM(o_orderpriority = '1-URGENT') AS sum_expr, + SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, + SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 FROM tpch.orders GROUP BY 1 diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql index af28c71cf..cde69334c 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_ansi.sql @@ -1,3 +1,3 @@ SELECT - SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio + SUM(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql index af28c71cf..cde69334c 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_mysql.sql @@ -1,3 +1,3 @@ SELECT - SUM(CASE WHEN rating > 4.5 THEN 1 ELSE 0 END) / COUNT(*) AS ratio + SUM(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql index 7a6db2f6b..f3e20bfa6 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_snowflake.sql @@ -1,3 +1,3 @@ SELECT - SUM(IFF(rating > 4.5, 1, 0)) / COUNT(*) AS ratio + COUNT_IF(rating > 4.5) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql index b99291f91..121904d46 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen11_sqlite.sql @@ -1,3 +1,3 @@ SELECT - CAST(SUM(IIF(rating > 4.5, 1, 0)) AS REAL) / COUNT(*) AS ratio + CAST(SUM(rating > 4.5) AS REAL) / COUNT(*) AS ratio FROM main.restaurant diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql index d9975b8d2..586affd62 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql @@ -1,5 +1,11 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + SUM(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql index d9975b8d2..586affd62 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql @@ -1,5 +1,11 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + SUM(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql index 74d49c9a3..9d98b0324 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql @@ -1,5 +1,11 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) AS ratio + CAST(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / CASE + WHEN ( + COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) + ) <> 0 + THEN COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql index 9f437c5c6..cfee256b5 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql @@ -1,5 +1,11 @@ SELECT - COUNT_IF(LOWER(food_type) = 'vegan') / NULLIF(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) AS ratio + COUNT_IF(LOWER(food_type) = 'vegan') / CASE + WHEN ( + COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index 06f824271..a4f7ac562 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -1,5 +1,11 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'vegan'), 0) AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio + CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / CASE + WHEN ( + COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ) <> 0 + THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') + ELSE NULL + END AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'san francisco' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql index c15bdb8d1..ee1edcfa8 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_ansi.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio + SUM(LOWER(food_type) = 'italian') / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql index c15bdb8d1..ee1edcfa8 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_mysql.sql @@ -1,5 +1,5 @@ SELECT - COALESCE(SUM(LOWER(food_type) = 'italian'), 0) / NULLIF(COUNT(*), 0) AS ratio + SUM(LOWER(food_type) = 'italian') / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql index 5016a56a6..aaadabc97 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_postgres.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END), 0) AS DOUBLE PRECISION) / NULLIF(COUNT(*), 0) AS ratio + CAST(SUM(CASE WHEN LOWER(food_type) = 'italian' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql index c5dcfc7fb..ca869e50a 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen15_sqlite.sql @@ -1,5 +1,5 @@ SELECT - CAST(COALESCE(SUM(LOWER(food_type) = 'italian'), 0) AS REAL) / NULLIF(COUNT(*), 0) AS ratio + CAST(SUM(LOWER(food_type) = 'italian') AS REAL) / NULLIF(COUNT(*), 0) AS ratio FROM main.restaurant WHERE LOWER(city_name) = 'los angeles' diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql index cf572383b..3f8169add 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_ansi.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql index d3c25d5a9..48afc6331 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_mysql.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(CASE WHEN NOT restaurant.rating IS NULL THEN 1 ELSE 0 END) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql index de01bfd58..9041e014f 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_snowflake.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(IFF(NOT restaurant.rating IS NULL, 1, 0)) AS sum_expr, + COUNT_IF(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 diff --git a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql index 2ea3ca06a..f10e4066b 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen8_sqlite.sql @@ -12,7 +12,7 @@ WITH _s1 AS ( ), _s7 AS ( SELECT _s3.region, - SUM(IIF(NOT restaurant.rating IS NULL, 1, 0)) AS sum_expr, + SUM(NOT restaurant.rating IS NULL) AS sum_expr, SUM(restaurant.rating) AS sum_rating FROM main.location AS location LEFT JOIN _s1 AS _s3 From 87eec09f02a3cd47d2cce30179521948f6522325 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:22:22 -0800 Subject: [PATCH 10/22] Removing old --- tests/test_pipeline_tpch_custom.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index d67037a54..4fbe05137 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2690,19 +2690,6 @@ ), id="quarter_function_test", ), - pytest.param( - PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", - "TPCH", - lambda: pd.DataFrame( - { - "n": [30183], - } - ), - "redundant_has", - ), - id="redundant_has", - ), pytest.param( PyDoughPandasTest( "result = TPCH.CALCULATE(" From 9fe336cf5db5b83c9d597199d5faa2d33197f54f Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 28 Jan 2026 23:51:23 -0800 Subject: [PATCH 11/22] Added extra BIRD tests --- pydough/conversion/hybrid_tree.py | 4 -- tests/conftest.py | 2 +- tests/test_pipeline_s3_datasets.py | 54 +++++++++++++++++++ tests/test_plan_refsols/donor_3276.txt | 4 ++ tests/test_plan_refsols/movielens_2274.txt | 6 +++ tests/test_sql_refsols/donor_3276_ansi.sql | 7 +++ tests/test_sql_refsols/donor_3276_mysql.sql | 7 +++ .../test_sql_refsols/donor_3276_postgres.sql | 7 +++ .../test_sql_refsols/donor_3276_snowflake.sql | 7 +++ tests/test_sql_refsols/donor_3276_sqlite.sql | 7 +++ .../test_sql_refsols/movielens_2274_ansi.sql | 11 ++++ .../test_sql_refsols/movielens_2274_mysql.sql | 22 ++++++++ .../movielens_2274_postgres.sql | 22 ++++++++ .../movielens_2274_snowflake.sql | 18 +++++++ .../movielens_2274_sqlite.sql | 22 ++++++++ 15 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 tests/test_plan_refsols/donor_3276.txt create mode 100644 tests/test_plan_refsols/movielens_2274.txt create mode 100644 tests/test_sql_refsols/donor_3276_ansi.sql create mode 100644 tests/test_sql_refsols/donor_3276_mysql.sql create mode 100644 tests/test_sql_refsols/donor_3276_postgres.sql create mode 100644 tests/test_sql_refsols/donor_3276_snowflake.sql create mode 100644 tests/test_sql_refsols/donor_3276_sqlite.sql create mode 100644 tests/test_sql_refsols/movielens_2274_ansi.sql create mode 100644 tests/test_sql_refsols/movielens_2274_mysql.sql create mode 100644 tests/test_sql_refsols/movielens_2274_postgres.sql create mode 100644 tests/test_sql_refsols/movielens_2274_snowflake.sql create mode 100644 tests/test_sql_refsols/movielens_2274_sqlite.sql diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index ab716efb5..aeea1bf38 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -980,10 +980,6 @@ def remove_dead_children(self, must_remove: set[int]) -> dict[int, int]: ) or self.children[child_idx].connection_type.is_anti: children_to_delete.discard(child_idx) - print() - print(self) - print(children_to_delete) - if len(children_to_delete) == 0: return {i: i for i in range(len(self.children))} diff --git a/tests/conftest.py b/tests/conftest.py index 8334590e0..cdbb7c47f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -682,7 +682,7 @@ def _impl(database_name: str) -> DatabaseContext: return _impl -S3_DATASETS = ["synthea", "world_development_indicators", "menu"] +S3_DATASETS = ["synthea", "world_development_indicators", "menu", "donor", "movielens"] """ Contains the name of all the custom datasets that will be used for testing. This includes the datasets from S3 and initialized with a .sql file. diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py index 9570029f2..e23be6a79 100644 --- a/tests/test_pipeline_s3_datasets.py +++ b/tests/test_pipeline_s3_datasets.py @@ -137,6 +137,60 @@ ), id="menu_5556", ), + pytest.param( + PyDoughPandasTest( + """ +result = donor.CALCULATE( + total_sb_projects=COUNT(projects.WHERE(LOWER(school_city) == "santa barbara")), + suburban_sb_projects=COUNT(projects.WHERE( + (LOWER(school_city) == "santa barbara") & + (LOWER(school_metro) == "suburban") + )) +).CALCULATE( + percentage_suburban=100.0 * suburban_sb_projects / total_sb_projects +) + """, + "donor", + lambda: pd.DataFrame( + { + "percentage_suburban": [30.303], + } + ), + "donor_3276", + ), + id="donor_3276", + ), + pytest.param( + PyDoughPandasTest( + """ +result = movielens.CALCULATE( + # Count all users who have at least one rating of 2 + total_users_with_rating_2 = COUNT(users.WHERE( + HAS(submitted_ratings.WHERE(rating == 2)) + )), + # Count female users who have at least one rating of 2 + female_users_with_rating_2 = COUNT(users.WHERE( + HAS(submitted_ratings.WHERE(rating == 2)) & (LOWER(u_gender) == 'f') + )) +).CALCULATE( + # Calculate the percentage, handling division by zero + percentage_of_female_users = IFF( + total_users_with_rating_2 > 0, + 100.0 * female_users_with_rating_2 / total_users_with_rating_2, + 0.0 + ) +) + """, + "movielens", + lambda: pd.DataFrame( + { + "percentage_of_female_users": [27.961], + } + ), + "movielens_2274", + ), + id="movielens_2274", + ), ], ) def s3_datasets_test_data(request) -> PyDoughPandasTest: diff --git a/tests/test_plan_refsols/donor_3276.txt b/tests/test_plan_refsols/donor_3276.txt new file mode 100644 index 000000000..ca4a91b96 --- /dev/null +++ b/tests/test_plan_refsols/donor_3276.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('percentage_suburban', 100.0:numeric * suburban_sb_projects / n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'suburban_sb_projects': SUM(LOWER(school_metro) == 'suburban':string)}) + FILTER(condition=LOWER(school_city) == 'santa barbara':string, columns={'school_metro': school_metro}) + SCAN(table=main.projects, columns={'school_city': school_city, 'school_metro': school_metro}) diff --git a/tests/test_plan_refsols/movielens_2274.txt b/tests/test_plan_refsols/movielens_2274.txt new file mode 100644 index 000000000..f16c77a47 --- /dev/null +++ b/tests/test_plan_refsols/movielens_2274.txt @@ -0,0 +1,6 @@ +ROOT(columns=[('percentage_of_female_users', IFF(n_rows > 0:numeric, 100.0:numeric * sum_expr / n_rows, 0.0:numeric))], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(LOWER(u_gender) == 'f':string)}) + JOIN(condition=t0.userid == t1.userid, type=SEMI, columns={'u_gender': t0.u_gender}) + SCAN(table=main.users, columns={'u_gender': u_gender, 'userid': userid}) + FILTER(condition=rating == 2:numeric, columns={'userid': userid}) + SCAN(table=main.u2base, columns={'rating': rating, 'userid': userid}) diff --git a/tests/test_sql_refsols/donor_3276_ansi.sql b/tests/test_sql_refsols/donor_3276_ansi.sql new file mode 100644 index 000000000..c180985e8 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_ansi.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_mysql.sql b/tests/test_sql_refsols/donor_3276_mysql.sql new file mode 100644 index 000000000..c180985e8 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_mysql.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_postgres.sql b/tests/test_sql_refsols/donor_3276_postgres.sql new file mode 100644 index 000000000..65e0b08d0 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_postgres.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * SUM(CASE WHEN LOWER(school_metro) = 'suburban' THEN 1 ELSE 0 END) + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_snowflake.sql b/tests/test_sql_refsols/donor_3276_snowflake.sql new file mode 100644 index 000000000..bd9c447e5 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + ( + 100.0 * COUNT_IF(LOWER(school_metro) = 'suburban') + ) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/donor_3276_sqlite.sql b/tests/test_sql_refsols/donor_3276_sqlite.sql new file mode 100644 index 000000000..2f303e9d6 --- /dev/null +++ b/tests/test_sql_refsols/donor_3276_sqlite.sql @@ -0,0 +1,7 @@ +SELECT + CAST(( + 100.0 * SUM(LOWER(school_metro) = 'suburban') + ) AS REAL) / COUNT(*) AS percentage_suburban +FROM main.projects +WHERE + LOWER(school_city) = 'santa barbara' diff --git a/tests/test_sql_refsols/movielens_2274_ansi.sql b/tests/test_sql_refsols/movielens_2274_ansi.sql new file mode 100644 index 000000000..24541f689 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_ansi.sql @@ -0,0 +1,11 @@ +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +JOIN main.u2base AS u2base + ON u2base.rating = 2 AND u2base.userid = users.userid diff --git a/tests/test_sql_refsols/movielens_2274_mysql.sql b/tests/test_sql_refsols/movielens_2274_mysql.sql new file mode 100644 index 000000000..4dcbd4e83 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_mysql.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_postgres.sql b/tests/test_sql_refsols/movielens_2274_postgres.sql new file mode 100644 index 000000000..7d41e8e1d --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_postgres.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + CASE + WHEN COUNT(*) > 0 + THEN ( + 100.0 * SUM(CASE WHEN LOWER(users.u_gender) = 'f' THEN 1 ELSE 0 END) + ) / COUNT(*) + ELSE 0.0 + END AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_snowflake.sql b/tests/test_sql_refsols/movielens_2274_snowflake.sql new file mode 100644 index 000000000..a76f7e516 --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_snowflake.sql @@ -0,0 +1,18 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + IFF(COUNT(*) > 0, ( + 100.0 * COUNT_IF(LOWER(users.u_gender) = 'f') + ) / COUNT(*), 0.0) AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/movielens_2274_sqlite.sql b/tests/test_sql_refsols/movielens_2274_sqlite.sql new file mode 100644 index 000000000..01d32927d --- /dev/null +++ b/tests/test_sql_refsols/movielens_2274_sqlite.sql @@ -0,0 +1,22 @@ +WITH _u_0 AS ( + SELECT + userid AS _u_1 + FROM main.u2base + WHERE + rating = 2 + GROUP BY + 1 +) +SELECT + IIF( + COUNT(*) > 0, + CAST(( + 100.0 * SUM(LOWER(users.u_gender) = 'f') + ) AS REAL) / COUNT(*), + 0.0 + ) AS percentage_of_female_users +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.userid +WHERE + NOT _u_0._u_1 IS NULL From 82371286d70bd8e29985cf297a13d73903636015 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 30 Jan 2026 13:25:04 -0800 Subject: [PATCH 12/22] Added more multiple_filter tests and removed the sql file testing --- tests/test_pipeline_tpch_custom.py | 335 +++++++++++++++++- .../count_multiple_filters_b.txt | 12 +- .../count_multiple_filters_c.txt | 11 +- .../count_multiple_filters_d.txt | 42 +-- .../count_multiple_filters_e.txt | 18 +- .../count_multiple_filters_f.txt | 3 + .../count_multiple_filters_g.txt | 36 ++ .../count_multiple_filters_h.txt | 10 + .../count_multiple_filters_i.txt | 11 + .../count_multiple_filters_j.txt | 19 + .../count_multiple_filters_k.txt | 20 ++ .../count_multiple_filters_l.txt | 19 + .../count_multiple_filters_m.txt | 20 ++ .../count_multiple_filters_n.txt | 20 ++ .../count_multiple_filters_o.txt | 20 ++ .../count_multiple_filters_p.txt | 20 ++ .../count_multiple_filters_a_ansi.sql | 6 - .../count_multiple_filters_a_mysql.sql | 6 - .../count_multiple_filters_a_postgres.sql | 6 - .../count_multiple_filters_a_snowflake.sql | 6 - .../count_multiple_filters_a_sqlite.sql | 6 - .../count_multiple_filters_b_ansi.sql | 26 -- .../count_multiple_filters_b_mysql.sql | 26 -- .../count_multiple_filters_b_postgres.sql | 26 -- .../count_multiple_filters_b_snowflake.sql | 26 -- .../count_multiple_filters_b_sqlite.sql | 26 -- .../count_multiple_filters_c_ansi.sql | 13 - .../count_multiple_filters_c_mysql.sql | 13 - .../count_multiple_filters_c_postgres.sql | 17 - .../count_multiple_filters_c_snowflake.sql | 13 - .../count_multiple_filters_c_sqlite.sql | 13 - .../count_multiple_filters_d_ansi.sql | 96 ----- .../count_multiple_filters_d_mysql.sql | 96 ----- .../count_multiple_filters_d_postgres.sql | 96 ----- .../count_multiple_filters_d_snowflake.sql | 95 ----- .../count_multiple_filters_d_sqlite.sql | 96 ----- .../count_multiple_filters_e_ansi.sql | 36 -- .../count_multiple_filters_e_mysql.sql | 36 -- .../count_multiple_filters_e_postgres.sql | 36 -- .../count_multiple_filters_e_snowflake.sql | 36 -- .../count_multiple_filters_e_sqlite.sql | 36 -- 41 files changed, 552 insertions(+), 952 deletions(-) create mode 100644 tests/test_plan_refsols/count_multiple_filters_f.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_g.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_h.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_i.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_j.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_k.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_l.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_m.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_n.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_o.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_p.txt delete mode 100644 tests/test_sql_refsols/count_multiple_filters_a_ansi.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_a_mysql.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_a_postgres.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_b_ansi.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_b_mysql.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_b_postgres.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_c_ansi.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_c_mysql.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_c_postgres.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_d_ansi.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_d_mysql.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_d_postgres.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_e_ansi.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_e_mysql.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_e_postgres.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql delete mode 100644 tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 5e2c848de..7da160a51 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2704,9 +2704,70 @@ } ), "count_multiple_filters_a", + skip_sql=True, ), id="count_multiple_filters_a", ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(MONOTONIC(500, account_balance, 600))\n" + "c2 = customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1), " + " n2=COUNT(c2), " + ").WHERE(HAS(c1))", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [268], + } + ), + "count_multiple_filters_b", + skip_sql=True, + ), + id="count_multiple_filters_b", + ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(MONOTONIC(500, account_balance, 600))\n" + "c2 = customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1), " + " n2=COUNT(c2), " + ").WHERE(HAS(c2))", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [268], + } + ), + "count_multiple_filters_c", + skip_sql=True, + ), + id="count_multiple_filters_c", + ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(MONOTONIC(500, account_balance, 600))\n" + "c2 = customers.WHERE((market_segment == 'BUILDING') & MONOTONIC(500, account_balance, 600))\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1), " + " n2=COUNT(c2), " + ").WHERE(HAS(c1) & HAS(c2))", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1379], + "n2": [268], + } + ), + "count_multiple_filters_d", + skip_sql=True, + ), + id="count_multiple_filters_d", + ), pytest.param( PyDoughPandasTest( "result = TPCH.CALCULATE(" @@ -2728,9 +2789,10 @@ "n6": [19], } ), - "count_multiple_filters_b", + "count_multiple_filters_e", + skip_sql=True, ), - id="count_multiple_filters_b", + id="count_multiple_filters_e", ), pytest.param( PyDoughPandasTest( @@ -2753,9 +2815,10 @@ "n6": [19], } ), - "count_multiple_filters_c", + "count_multiple_filters_f", + skip_sql=True, ), - id="count_multiple_filters_c", + id="count_multiple_filters_f", ), pytest.param( PyDoughPandasTest( @@ -2785,9 +2848,10 @@ "n6": [6024], } ), - "count_multiple_filters_d", + "count_multiple_filters_g", + skip_sql=True, ), - id="count_multiple_filters_d", + id="count_multiple_filters_g", ), pytest.param( PyDoughPandasTest( @@ -2816,9 +2880,264 @@ "n5": [59597, 59230, 60485, 60375, 59036], } ), - "count_multiple_filters_e", + "count_multiple_filters_h", + skip_sql=True, ), - id="count_multiple_filters_e", + id="count_multiple_filters_h", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders), " + " n3=COUNT(nations.customers.orders.WHERE(order_priority == '1-URGENT')), " + " n4=COUNT(nations.customers.orders.WHERE(order_priority == '2-HIGH')), " + " n5=COUNT(nations.customers.orders.WHERE(order_priority == '3-MEDIUM')), " + ").WHERE(HAS(nations.customers.orders.WHERE(order_priority == '2-HIGH')))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [298994, 299103, 301740, 303286, 296877], + "n3": [59767, 59902, 60166, 60373, 60135], + "n4": [59511, 60232, 60246, 60901, 59201], + "n5": [59597, 59230, 60485, 60375, 59036], + } + ), + "count_multiple_filters_i", + skip_sql=True, + ), + id="count_multiple_filters_i", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders.WHERE(order_priority == '1-URGENT')), " + " n3=COUNT(nations.customers.orders.WHERE(order_priority == '2-HIGH')), " + " n4=COUNT(nations.customers.orders.WHERE(order_priority == '3-MEDIUM')), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [59767, 59902, 60166, 60373, 60135], + "n3": [59511, 60232, 60246, 60901, 59201], + "n4": [59597, 59230, 60485, 60375, 59036], + } + ), + "count_multiple_filters_j", + skip_sql=True, + ), + id="count_multiple_filters_j", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders.WHERE(order_priority == '1-URGENT')), " + " n3=COUNT(nations.customers.orders.WHERE(order_priority == '2-HIGH')), " + " n4=COUNT(nations.customers.orders.WHERE(order_priority == '3-MEDIUM')), " + ").WHERE(HAS(nations.customers.orders.WHERE(order_priority == '1-URGENT')))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [59767, 59902, 60166, 60373, 60135], + "n3": [59511, 60232, 60246, 60901, 59201], + "n4": [59597, 59230, 60485, 60375, 59036], + } + ), + "count_multiple_filters_k", + skip_sql=True, + ), + id="count_multiple_filters_k", + ), + pytest.param( + PyDoughPandasTest( + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(nations.customers.orders.WHERE((order_priority == '1-URGENT') | (order_priority == '2-HIGH'))), " + " n3=COUNT(nations.customers.orders.WHERE((order_priority == '2-HIGH') | (order_priority == '3-MEDIUM'))), " + " n4=COUNT(nations.customers.orders.WHERE((order_priority == '3-MEDIUM') | (order_priority == '4-NOT SPECIFIED'))), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [119278, 120134, 120412, 121274, 119336], + "n3": [119108, 119462, 120731, 121276, 118237], + "n4": [119665, 119193, 121015, 121129, 117975], + } + ), + "count_multiple_filters_l", + skip_sql=True, + ), + id="count_multiple_filters_l", + ), + pytest.param( + PyDoughPandasTest( + "c1 = nations.customers.orders.WHERE((order_priority == '1-URGENT') | (order_priority == '2-HIGH'))\n" + "c2 = nations.customers.orders.WHERE((order_priority == '2-HIGH') | (order_priority == '3-MEDIUM'))\n" + "c3 = nations.customers.orders.WHERE((order_priority == '3-MEDIUM') | (order_priority == '4-NOT SPECIFIED'))\n" + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(c1), " + " n3=COUNT(c2), " + " n4=COUNT(c3), " + ").WHERE(HAS(c1))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [119278, 120134, 120412, 121274, 119336], + "n3": [119108, 119462, 120731, 121276, 118237], + "n4": [119665, 119193, 121015, 121129, 117975], + } + ), + "count_multiple_filters_m", + skip_sql=True, + ), + id="count_multiple_filters_m", + ), + pytest.param( + PyDoughPandasTest( + "c1 = nations.customers.orders.WHERE((order_priority == '1-URGENT') | (order_priority == '2-HIGH'))\n" + "c2 = nations.customers.orders.WHERE((order_priority == '2-HIGH') | (order_priority == '3-MEDIUM'))\n" + "c3 = nations.customers.orders.WHERE((order_priority == '3-MEDIUM') | (order_priority == '4-NOT SPECIFIED'))\n" + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(c1), " + " n3=COUNT(c2), " + " n4=COUNT(c3), " + ").WHERE(HAS(c1) & HAS(c2))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [119278, 120134, 120412, 121274, 119336], + "n3": [119108, 119462, 120731, 121276, 118237], + "n4": [119665, 119193, 121015, 121129, 117975], + } + ), + "count_multiple_filters_n", + skip_sql=True, + ), + id="count_multiple_filters_n", + ), + pytest.param( + PyDoughPandasTest( + "c1 = nations.customers.orders.WHERE((order_priority == '1-URGENT') | (order_priority == '2-HIGH'))\n" + "c2 = nations.customers.orders.WHERE((order_priority == '2-HIGH') | (order_priority == '3-MEDIUM'))\n" + "c3 = nations.customers.orders.WHERE((order_priority == '3-MEDIUM') | (order_priority == '4-NOT SPECIFIED'))\n" + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(c1), " + " n3=COUNT(c2), " + " n4=COUNT(c3), " + ").WHERE(HAS(c1) & HAS(c3))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [119278, 120134, 120412, 121274, 119336], + "n3": [119108, 119462, 120731, 121276, 118237], + "n4": [119665, 119193, 121015, 121129, 117975], + } + ), + "count_multiple_filters_o", + skip_sql=True, + ), + id="count_multiple_filters_o", + ), + pytest.param( + PyDoughPandasTest( + "c1 = nations.customers.orders.WHERE((order_priority == '1-URGENT') | (order_priority == '2-HIGH'))\n" + "c2 = nations.customers.orders.WHERE((order_priority == '2-HIGH') | (order_priority == '3-MEDIUM'))\n" + "c3 = nations.customers.orders.WHERE((order_priority == '3-MEDIUM') | (order_priority == '4-NOT SPECIFIED'))\n" + "result = regions.CALCULATE(" + " region_name=name, " + " n1=COUNT(nations.customers), " + " n2=COUNT(c1), " + " n3=COUNT(c2), " + " n4=COUNT(c3), " + ").WHERE(HAS(c1) & HAS(c2) & HAS(c3))", + "TPCH", + lambda: pd.DataFrame( + { + "region_name": [ + "AFRICA", + "AMERICA", + "ASIA", + "EUROPE", + "MIDDLE EAST", + ], + "n1": [29764, 29952, 30183, 30197, 29904], + "n2": [119278, 120134, 120412, 121274, 119336], + "n3": [119108, 119462, 120731, 121276, 118237], + "n4": [119665, 119193, 121015, 121129, 117975], + } + ), + "count_multiple_filters_p", + skip_sql=True, + ), + id="count_multiple_filters_p", ), pytest.param( PyDoughPandasTest( diff --git a/tests/test_plan_refsols/count_multiple_filters_b.txt b/tests/test_plan_refsols/count_multiple_filters_b.txt index 3ebaf6592..58fde44d6 100644 --- a/tests/test_plan_refsols/count_multiple_filters_b.txt +++ b/tests/test_plan_refsols/count_multiple_filters_b.txt @@ -1,8 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) - FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) - SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string)}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index fed82fccd..7fd785d58 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,3 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) +ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n2': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_d.txt b/tests/test_plan_refsols/count_multiple_filters_d.txt index b80d0ef1c..7fd785d58 100644 --- a/tests/test_plan_refsols/count_multiple_filters_d.txt +++ b/tests/test_plan_refsols/count_multiple_filters_d.txt @@ -1,36 +1,8 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_agg_1': t1.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n2': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt index b67bcbb02..3ebaf6592 100644 --- a/tests/test_plan_refsols/count_multiple_filters_e.txt +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -1,10 +1,8 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_21, 0:numeric)), ('n5', DEFAULT_TO(sum_sum_expr_22, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_21': t1.sum_sum_expr_21, 'sum_sum_expr_22': t1.sum_sum_expr_22}) - SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_21': SUM(sum_expr_21), 'sum_sum_expr_22': SUM(sum_expr_22)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_21': t1.sum_expr_21, 'sum_expr_22': t1.sum_expr_22}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_21': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_22': SUM(o_orderpriority == '3-MEDIUM':string)}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) + FILTER(condition=c_mktsegment == 'BUILDING':string, columns={'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_f.txt b/tests/test_plan_refsols/count_multiple_filters_f.txt new file mode 100644 index 000000000..fed82fccd --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_f.txt @@ -0,0 +1,3 @@ +ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_g.txt b/tests/test_plan_refsols/count_multiple_filters_g.txt new file mode 100644 index 000000000..b80d0ef1c --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_g.txt @@ -0,0 +1,36 @@ +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_agg_1': t1.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_h.txt b/tests/test_plan_refsols/count_multiple_filters_h.txt new file mode 100644 index 000000000..b67bcbb02 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_h.txt @@ -0,0 +1,10 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_21, 0:numeric)), ('n5', DEFAULT_TO(sum_sum_expr_22, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_21': t1.sum_sum_expr_21, 'sum_sum_expr_22': t1.sum_sum_expr_22}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_21': SUM(sum_expr_21), 'sum_sum_expr_22': SUM(sum_expr_22)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_21': t1.sum_expr_21, 'sum_expr_22': t1.sum_expr_22}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_21': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_22': SUM(o_orderpriority == '3-MEDIUM':string)}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_i.txt b/tests/test_plan_refsols/count_multiple_filters_i.txt new file mode 100644 index 000000000..452b5611d --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_i.txt @@ -0,0 +1,11 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_n_rows, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', sum_sum_expr_21), ('n5', DEFAULT_TO(sum_sum_expr_22, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_21': t1.sum_sum_expr_21, 'sum_sum_expr_22': t1.sum_sum_expr_22}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_sum_expr_21 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_21': sum_sum_expr_21, 'sum_sum_expr_22': sum_sum_expr_22}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_21': SUM(sum_expr_21), 'sum_sum_expr_22': SUM(sum_expr_22)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_21': t1.sum_expr_21, 'sum_expr_22': t1.sum_expr_22}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_21': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_22': SUM(o_orderpriority == '3-MEDIUM':string)}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_j.txt b/tests/test_plan_refsols/count_multiple_filters_j.txt new file mode 100644 index 000000000..0778fc83d --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_j.txt @@ -0,0 +1,19 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr_5, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_k.txt b/tests/test_plan_refsols/count_multiple_filters_k.txt new file mode 100644 index 000000000..e0c244bd8 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_k.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_expr_5 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_l.txt b/tests/test_plan_refsols/count_multiple_filters_l.txt new file mode 100644 index 000000000..0f36d4f4b --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_l.txt @@ -0,0 +1,19 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr_5, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_m.txt b/tests/test_plan_refsols/count_multiple_filters_m.txt new file mode 100644 index 000000000..8e90879f9 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_m.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_expr_5 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_n.txt b/tests/test_plan_refsols/count_multiple_filters_n.txt new file mode 100644 index 000000000..4d6be32e7 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_n.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_8), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_expr_5 != 0:numeric & sum_expr_8 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_o.txt b/tests/test_plan_refsols/count_multiple_filters_o.txt new file mode 100644 index 000000000..c3406625f --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_o.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', sum_n_rows)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_expr_5 != 0:numeric & sum_n_rows != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_p.txt b/tests/test_plan_refsols/count_multiple_filters_p.txt new file mode 100644 index 000000000..8a1408be8 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_p.txt @@ -0,0 +1,20 @@ +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_8), ('n4', sum_n_rows)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) + FILTER(condition=sum_expr_5 != 0:numeric & sum_expr_8 != 0:numeric & sum_n_rows != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql deleted file mode 100644 index 5a423e7e5..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_a_ansi.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2 -FROM tpch.customer -WHERE - c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql deleted file mode 100644 index 184cf2b26..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_a_mysql.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2 -FROM tpch.CUSTOMER -WHERE - c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql deleted file mode 100644 index ba0018b62..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_a_postgres.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2 -FROM tpch.customer -WHERE - c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql deleted file mode 100644 index 7150be416..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_a_snowflake.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - COUNT(*) AS n1, - COUNT_IF(c_mktsegment = 'BUILDING') AS n2 -FROM tpch.customer -WHERE - c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql deleted file mode 100644 index 5a423e7e5..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_a_sqlite.sql +++ /dev/null @@ -1,6 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2 -FROM tpch.customer -WHERE - c_acctbal <= 600 AND c_acctbal >= 500 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql deleted file mode 100644 index 4f96f56d6..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_b_ansi.sql +++ /dev/null @@ -1,26 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 - FROM tpch.customer - WHERE - c_acctbal <= 600 AND c_acctbal >= 500 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr - FROM tpch.customer - WHERE - c_mktsegment = 'BUILDING' -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, - _s0.sum_expr_12 AS n6 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql deleted file mode 100644 index 79cd17fa8..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_b_mysql.sql +++ /dev/null @@ -1,26 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 - FROM tpch.CUSTOMER - WHERE - c_acctbal <= 600 AND c_acctbal >= 500 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr - FROM tpch.CUSTOMER - WHERE - c_mktsegment = 'BUILDING' -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, - _s0.sum_expr_12 AS n6 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql deleted file mode 100644 index 8ce688027..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_b_postgres.sql +++ /dev/null @@ -1,26 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_11, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr_12 - FROM tpch.customer - WHERE - c_acctbal <= 600 AND c_acctbal >= 500 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS sum_expr - FROM tpch.customer - WHERE - c_mktsegment = 'BUILDING' -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, - _s0.sum_expr_12 AS n6 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql deleted file mode 100644 index a6a0d4dfd..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_b_snowflake.sql +++ /dev/null @@ -1,26 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - COUNT_IF(c_mktsegment = 'BUILDING') AS sum_expr, - COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr_11, - COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS sum_expr_12 - FROM tpch.customer - WHERE - c_acctbal <= 600 AND c_acctbal >= 500 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows, - COUNT_IF(STARTSWITH(c_phone, '11')) AS sum_expr - FROM tpch.customer - WHERE - c_mktsegment = 'BUILDING' -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, - _s0.sum_expr_12 AS n6 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql deleted file mode 100644 index 4f96f56d6..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_b_sqlite.sql +++ /dev/null @@ -1,26 +0,0 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_mktsegment = 'BUILDING') AS sum_expr, - SUM(c_phone LIKE '11%') AS sum_expr_11, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS sum_expr_12 - FROM tpch.customer - WHERE - c_acctbal <= 600 AND c_acctbal >= 500 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows, - SUM(c_phone LIKE '11%') AS sum_expr - FROM tpch.customer - WHERE - c_mktsegment = 'BUILDING' -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s0.sum_expr AS n3, - _s0.sum_expr_11 AS n4, - _s1.sum_expr AS n5, - _s0.sum_expr_12 AS n6 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql deleted file mode 100644 index 1d410f585..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_c_ansi.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2, - SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, - SUM(c_phone LIKE '11%') AS n4, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, - SUM( - c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - ) AS n6 -FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql deleted file mode 100644 index 2926641ca..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_c_mysql.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2, - SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, - SUM(c_phone LIKE '11%') AS n4, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, - SUM( - c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - ) AS n6 -FROM tpch.CUSTOMER diff --git a/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql deleted file mode 100644 index 3861720ce..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_c_postgres.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(CASE WHEN c_mktsegment = 'BUILDING' THEN 1 ELSE 0 END) AS n2, - SUM(CASE WHEN c_acctbal <= 600 AND c_acctbal >= 500 THEN 1 ELSE 0 END) AS n3, - SUM(CASE WHEN c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n4, - SUM(CASE WHEN c_mktsegment = 'BUILDING' AND c_phone LIKE '11%' THEN 1 ELSE 0 END) AS n5, - SUM( - CASE - WHEN c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - THEN 1 - ELSE 0 - END - ) AS n6 -FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql deleted file mode 100644 index dc9d70b1d..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_c_snowflake.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - COUNT(*) AS n1, - COUNT_IF(c_mktsegment = 'BUILDING') AS n2, - COUNT_IF(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, - COUNT_IF(STARTSWITH(c_phone, '11')) AS n4, - COUNT_IF(STARTSWITH(c_phone, '11') AND c_mktsegment = 'BUILDING') AS n5, - COUNT_IF( - STARTSWITH(c_phone, '11') - AND c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - ) AS n6 -FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql deleted file mode 100644 index 1d410f585..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_c_sqlite.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT - COUNT(*) AS n1, - SUM(c_mktsegment = 'BUILDING') AS n2, - SUM(c_acctbal <= 600 AND c_acctbal >= 500) AS n3, - SUM(c_phone LIKE '11%') AS n4, - SUM(c_mktsegment = 'BUILDING' AND c_phone LIKE '11%') AS n5, - SUM( - c_acctbal <= 600 - AND c_acctbal >= 500 - AND c_mktsegment = 'BUILDING' - AND c_phone LIKE '11%' - ) AS n6 -FROM tpch.customer diff --git a/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql deleted file mode 100644 index 479f86891..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_d_ansi.sql +++ /dev/null @@ -1,96 +0,0 @@ -WITH _t1 AS ( - SELECT - 1 AS "_" - FROM tpch.customer - QUALIFY - NTILE(100) OVER (ORDER BY c_acctbal NULLS LAST) = 100 -), _s2 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t1 -), _s0 AS ( - SELECT - c_acctbal, - c_nationkey - FROM tpch.customer -), _t6 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'GERMANY' -), _t4 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s0 - JOIN _t6 AS _t6 - ON _s0.c_nationkey = _t6.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s0.c_acctbal NULLS LAST) = 100 -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t4 -), _s4 AS ( - SELECT - c_nationkey - FROM tpch.customer -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s4 - JOIN _t6 AS _t8 - ON _s4.c_nationkey = _t8.n_nationkey -), _t12 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'CHINA' -), _t10 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s8 - JOIN _t12 AS _t12 - ON _s8.c_nationkey = _t12.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s8.c_acctbal NULLS LAST) = 100 -), _s11 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t10 -), _t14 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s12 - JOIN tpch.nation AS nation - ON _s12.c_nationkey = nation.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s12.c_acctbal NULLS LAST) = 100 - AND nation.n_name = 'CHINA' -), _s15 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t14 -), _s19 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s16 - JOIN _t12 AS _t17 - ON _s16.c_nationkey = _t17.n_nationkey -) -SELECT - _s2.n_rows AS n1, - _s3.n_rows AS n2, - _s7.n_rows AS n3, - _s11.n_rows AS n4, - _s15.n_rows AS n5, - _s19.n_rows AS n6 -FROM _s2 AS _s2 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s7 AS _s7 -CROSS JOIN _s11 AS _s11 -CROSS JOIN _s15 AS _s15 -CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql deleted file mode 100644 index faae8bbfb..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_d_mysql.sql +++ /dev/null @@ -1,96 +0,0 @@ -WITH _t AS ( - SELECT - NTILE(100) OVER (ORDER BY CASE WHEN c_acctbal IS NULL THEN 1 ELSE 0 END, c_acctbal) AS _w - FROM tpch.CUSTOMER -), _s2 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t - WHERE - _w = 100 -), _s0 AS ( - SELECT - c_acctbal, - c_nationkey - FROM tpch.CUSTOMER -), _t6 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.NATION - WHERE - n_name = 'GERMANY' -), _t_2 AS ( - SELECT - NTILE(100) OVER (ORDER BY CASE WHEN _s0.c_acctbal IS NULL THEN 1 ELSE 0 END, _s0.c_acctbal) AS _w - FROM _s0 AS _s0 - JOIN _t6 AS _t6 - ON _s0.c_nationkey = _t6.n_nationkey -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_2 - WHERE - _w = 100 -), _s4 AS ( - SELECT - c_nationkey - FROM tpch.CUSTOMER -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s4 - JOIN _t6 AS _t8 - ON _s4.c_nationkey = _t8.n_nationkey -), _t12 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.NATION - WHERE - n_name = 'CHINA' -), _t_3 AS ( - SELECT - NTILE(100) OVER (ORDER BY CASE WHEN _s8.c_acctbal IS NULL THEN 1 ELSE 0 END, _s8.c_acctbal) AS _w - FROM _s0 AS _s8 - JOIN _t12 AS _t12 - ON _s8.c_nationkey = _t12.n_nationkey -), _s11 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_3 - WHERE - _w = 100 -), _t_4 AS ( - SELECT - NATION.n_name, - NTILE(100) OVER (ORDER BY CASE WHEN _s12.c_acctbal IS NULL THEN 1 ELSE 0 END, _s12.c_acctbal) AS _w - FROM _s0 AS _s12 - JOIN tpch.NATION AS NATION - ON NATION.n_nationkey = _s12.c_nationkey -), _s15 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_4 - WHERE - _w = 100 AND n_name = 'CHINA' -), _s19 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s16 - JOIN _t12 AS _t17 - ON _s16.c_nationkey = _t17.n_nationkey -) -SELECT - _s2.n_rows AS n1, - _s3.n_rows AS n2, - _s7.n_rows AS n3, - _s11.n_rows AS n4, - _s15.n_rows AS n5, - _s19.n_rows AS n6 -FROM _s2 AS _s2 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s7 AS _s7 -CROSS JOIN _s11 AS _s11 -CROSS JOIN _s15 AS _s15 -CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql deleted file mode 100644 index 869decb54..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_d_postgres.sql +++ /dev/null @@ -1,96 +0,0 @@ -WITH _t AS ( - SELECT - NTILE(100) OVER (ORDER BY c_acctbal) AS _w - FROM tpch.customer -), _s2 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t - WHERE - _w = 100 -), _s0 AS ( - SELECT - c_acctbal, - c_nationkey - FROM tpch.customer -), _t6 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'GERMANY' -), _t_2 AS ( - SELECT - NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w - FROM _s0 AS _s0 - JOIN _t6 AS _t6 - ON _s0.c_nationkey = _t6.n_nationkey -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_2 - WHERE - _w = 100 -), _s4 AS ( - SELECT - c_nationkey - FROM tpch.customer -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s4 - JOIN _t6 AS _t8 - ON _s4.c_nationkey = _t8.n_nationkey -), _t12 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'CHINA' -), _t_3 AS ( - SELECT - NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w - FROM _s0 AS _s8 - JOIN _t12 AS _t12 - ON _s8.c_nationkey = _t12.n_nationkey -), _s11 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_3 - WHERE - _w = 100 -), _t_4 AS ( - SELECT - nation.n_name, - NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w - FROM _s0 AS _s12 - JOIN tpch.nation AS nation - ON _s12.c_nationkey = nation.n_nationkey -), _s15 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_4 - WHERE - _w = 100 AND n_name = 'CHINA' -), _s19 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s16 - JOIN _t12 AS _t17 - ON _s16.c_nationkey = _t17.n_nationkey -) -SELECT - _s2.n_rows AS n1, - _s3.n_rows AS n2, - _s7.n_rows AS n3, - _s11.n_rows AS n4, - _s15.n_rows AS n5, - _s19.n_rows AS n6 -FROM _s2 AS _s2 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s7 AS _s7 -CROSS JOIN _s11 AS _s11 -CROSS JOIN _s15 AS _s15 -CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql deleted file mode 100644 index 4dbe53347..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_d_snowflake.sql +++ /dev/null @@ -1,95 +0,0 @@ -WITH _t1 AS ( - SELECT - 1 AS "_" - FROM tpch.customer - QUALIFY - NTILE(100) OVER (ORDER BY c_acctbal) = 100 -), _s2 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t1 -), _s0 AS ( - SELECT - c_acctbal, - c_nationkey - FROM tpch.customer -), _t6 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'GERMANY' -), _t4 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s0 - JOIN _t6 AS _t6 - ON _s0.c_nationkey = _t6.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s0.c_acctbal) = 100 -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t4 -), _s4 AS ( - SELECT - c_nationkey - FROM tpch.customer -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s4 - JOIN _t6 AS _t8 - ON _s4.c_nationkey = _t8.n_nationkey -), _t12 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'CHINA' -), _t10 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s8 - JOIN _t12 AS _t12 - ON _s8.c_nationkey = _t12.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s8.c_acctbal) = 100 -), _s11 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t10 -), _t14 AS ( - SELECT - 1 AS "_" - FROM _s0 AS _s12 - JOIN tpch.nation AS nation - ON _s12.c_nationkey = nation.n_nationkey - QUALIFY - NTILE(100) OVER (ORDER BY _s12.c_acctbal) = 100 AND nation.n_name = 'CHINA' -), _s15 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t14 -), _s19 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s16 - JOIN _t12 AS _t17 - ON _s16.c_nationkey = _t17.n_nationkey -) -SELECT - _s2.n_rows AS n1, - _s3.n_rows AS n2, - _s7.n_rows AS n3, - _s11.n_rows AS n4, - _s15.n_rows AS n5, - _s19.n_rows AS n6 -FROM _s2 AS _s2 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s7 AS _s7 -CROSS JOIN _s11 AS _s11 -CROSS JOIN _s15 AS _s15 -CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql deleted file mode 100644 index 869decb54..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_d_sqlite.sql +++ /dev/null @@ -1,96 +0,0 @@ -WITH _t AS ( - SELECT - NTILE(100) OVER (ORDER BY c_acctbal) AS _w - FROM tpch.customer -), _s2 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t - WHERE - _w = 100 -), _s0 AS ( - SELECT - c_acctbal, - c_nationkey - FROM tpch.customer -), _t6 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'GERMANY' -), _t_2 AS ( - SELECT - NTILE(100) OVER (ORDER BY _s0.c_acctbal) AS _w - FROM _s0 AS _s0 - JOIN _t6 AS _t6 - ON _s0.c_nationkey = _t6.n_nationkey -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_2 - WHERE - _w = 100 -), _s4 AS ( - SELECT - c_nationkey - FROM tpch.customer -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s4 - JOIN _t6 AS _t8 - ON _s4.c_nationkey = _t8.n_nationkey -), _t12 AS ( - SELECT - n_name, - n_nationkey - FROM tpch.nation - WHERE - n_name = 'CHINA' -), _t_3 AS ( - SELECT - NTILE(100) OVER (ORDER BY _s8.c_acctbal) AS _w - FROM _s0 AS _s8 - JOIN _t12 AS _t12 - ON _s8.c_nationkey = _t12.n_nationkey -), _s11 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_3 - WHERE - _w = 100 -), _t_4 AS ( - SELECT - nation.n_name, - NTILE(100) OVER (ORDER BY _s12.c_acctbal) AS _w - FROM _s0 AS _s12 - JOIN tpch.nation AS nation - ON _s12.c_nationkey = nation.n_nationkey -), _s15 AS ( - SELECT - COUNT(*) AS n_rows - FROM _t_4 - WHERE - _w = 100 AND n_name = 'CHINA' -), _s19 AS ( - SELECT - COUNT(*) AS n_rows - FROM _s4 AS _s16 - JOIN _t12 AS _t17 - ON _s16.c_nationkey = _t17.n_nationkey -) -SELECT - _s2.n_rows AS n1, - _s3.n_rows AS n2, - _s7.n_rows AS n3, - _s11.n_rows AS n4, - _s15.n_rows AS n5, - _s19.n_rows AS n6 -FROM _s2 AS _s2 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s7 AS _s7 -CROSS JOIN _s11 AS _s11 -CROSS JOIN _s15 AS _s15 -CROSS JOIN _s19 AS _s19 diff --git a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql b/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql deleted file mode 100644 index 34e9c3fc0..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_e_ansi.sql +++ /dev/null @@ -1,36 +0,0 @@ -WITH _s3 AS ( - SELECT - o_custkey, - COUNT(*) AS n_rows, - SUM(o_orderpriority = '1-URGENT') AS sum_expr, - SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, - SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 - FROM tpch.orders - GROUP BY - 1 -), _s5 AS ( - SELECT - nation.n_regionkey, - COUNT(*) AS n_rows, - SUM(_s3.n_rows) AS sum_n_rows, - SUM(_s3.sum_expr) AS sum_sum_expr, - SUM(_s3.sum_expr_21) AS sum_sum_expr_21, - SUM(_s3.sum_expr_22) AS sum_sum_expr_22 - FROM tpch.nation AS nation - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey - GROUP BY - 1 -) -SELECT - region.r_name AS region_name, - _s5.n_rows AS n1, - COALESCE(_s5.sum_n_rows, 0) AS n2, - COALESCE(_s5.sum_sum_expr, 0) AS n3, - COALESCE(_s5.sum_sum_expr_21, 0) AS n4, - COALESCE(_s5.sum_sum_expr_22, 0) AS n5 -FROM tpch.region AS region -JOIN _s5 AS _s5 - ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql b/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql deleted file mode 100644 index eb63b410b..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_e_mysql.sql +++ /dev/null @@ -1,36 +0,0 @@ -WITH _s3 AS ( - SELECT - o_custkey, - COUNT(*) AS n_rows, - SUM(o_orderpriority = '1-URGENT') AS sum_expr, - SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, - SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 - FROM tpch.ORDERS - GROUP BY - 1 -), _s5 AS ( - SELECT - NATION.n_regionkey, - COUNT(*) AS n_rows, - SUM(_s3.n_rows) AS sum_n_rows, - SUM(_s3.sum_expr) AS sum_sum_expr, - SUM(_s3.sum_expr_21) AS sum_sum_expr_21, - SUM(_s3.sum_expr_22) AS sum_sum_expr_22 - FROM tpch.NATION AS NATION - JOIN tpch.CUSTOMER AS CUSTOMER - ON CUSTOMER.c_nationkey = NATION.n_nationkey - LEFT JOIN _s3 AS _s3 - ON CUSTOMER.c_custkey = _s3.o_custkey - GROUP BY - 1 -) -SELECT - REGION.r_name AS region_name, - _s5.n_rows AS n1, - COALESCE(_s5.sum_n_rows, 0) AS n2, - COALESCE(_s5.sum_sum_expr, 0) AS n3, - COALESCE(_s5.sum_sum_expr_21, 0) AS n4, - COALESCE(_s5.sum_sum_expr_22, 0) AS n5 -FROM tpch.REGION AS REGION -JOIN _s5 AS _s5 - ON REGION.r_regionkey = _s5.n_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql b/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql deleted file mode 100644 index 676f8e1d1..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_e_postgres.sql +++ /dev/null @@ -1,36 +0,0 @@ -WITH _s3 AS ( - SELECT - o_custkey, - COUNT(*) AS n_rows, - SUM(CASE WHEN o_orderpriority = '1-URGENT' THEN 1 ELSE 0 END) AS sum_expr, - SUM(CASE WHEN o_orderpriority = '2-HIGH' THEN 1 ELSE 0 END) AS sum_expr_21, - SUM(CASE WHEN o_orderpriority = '3-MEDIUM' THEN 1 ELSE 0 END) AS sum_expr_22 - FROM tpch.orders - GROUP BY - 1 -), _s5 AS ( - SELECT - nation.n_regionkey, - COUNT(*) AS n_rows, - SUM(_s3.n_rows) AS sum_n_rows, - SUM(_s3.sum_expr) AS sum_sum_expr, - SUM(_s3.sum_expr_21) AS sum_sum_expr_21, - SUM(_s3.sum_expr_22) AS sum_sum_expr_22 - FROM tpch.nation AS nation - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey - GROUP BY - 1 -) -SELECT - region.r_name AS region_name, - _s5.n_rows AS n1, - COALESCE(_s5.sum_n_rows, 0) AS n2, - COALESCE(_s5.sum_sum_expr, 0) AS n3, - COALESCE(_s5.sum_sum_expr_21, 0) AS n4, - COALESCE(_s5.sum_sum_expr_22, 0) AS n5 -FROM tpch.region AS region -JOIN _s5 AS _s5 - ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql b/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql deleted file mode 100644 index e194c3bd4..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_e_snowflake.sql +++ /dev/null @@ -1,36 +0,0 @@ -WITH _s3 AS ( - SELECT - o_custkey, - COUNT(*) AS n_rows, - COUNT_IF(o_orderpriority = '1-URGENT') AS sum_expr, - COUNT_IF(o_orderpriority = '2-HIGH') AS sum_expr_21, - COUNT_IF(o_orderpriority = '3-MEDIUM') AS sum_expr_22 - FROM tpch.orders - GROUP BY - 1 -), _s5 AS ( - SELECT - nation.n_regionkey, - COUNT(*) AS n_rows, - SUM(_s3.n_rows) AS sum_n_rows, - SUM(_s3.sum_expr) AS sum_sum_expr, - SUM(_s3.sum_expr_21) AS sum_sum_expr_21, - SUM(_s3.sum_expr_22) AS sum_sum_expr_22 - FROM tpch.nation AS nation - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey - GROUP BY - 1 -) -SELECT - region.r_name AS region_name, - _s5.n_rows AS n1, - COALESCE(_s5.sum_n_rows, 0) AS n2, - COALESCE(_s5.sum_sum_expr, 0) AS n3, - COALESCE(_s5.sum_sum_expr_21, 0) AS n4, - COALESCE(_s5.sum_sum_expr_22, 0) AS n5 -FROM tpch.region AS region -JOIN _s5 AS _s5 - ON _s5.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql b/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql deleted file mode 100644 index 34e9c3fc0..000000000 --- a/tests/test_sql_refsols/count_multiple_filters_e_sqlite.sql +++ /dev/null @@ -1,36 +0,0 @@ -WITH _s3 AS ( - SELECT - o_custkey, - COUNT(*) AS n_rows, - SUM(o_orderpriority = '1-URGENT') AS sum_expr, - SUM(o_orderpriority = '2-HIGH') AS sum_expr_21, - SUM(o_orderpriority = '3-MEDIUM') AS sum_expr_22 - FROM tpch.orders - GROUP BY - 1 -), _s5 AS ( - SELECT - nation.n_regionkey, - COUNT(*) AS n_rows, - SUM(_s3.n_rows) AS sum_n_rows, - SUM(_s3.sum_expr) AS sum_sum_expr, - SUM(_s3.sum_expr_21) AS sum_sum_expr_21, - SUM(_s3.sum_expr_22) AS sum_sum_expr_22 - FROM tpch.nation AS nation - JOIN tpch.customer AS customer - ON customer.c_nationkey = nation.n_nationkey - LEFT JOIN _s3 AS _s3 - ON _s3.o_custkey = customer.c_custkey - GROUP BY - 1 -) -SELECT - region.r_name AS region_name, - _s5.n_rows AS n1, - COALESCE(_s5.sum_n_rows, 0) AS n2, - COALESCE(_s5.sum_sum_expr, 0) AS n3, - COALESCE(_s5.sum_sum_expr_21, 0) AS n4, - COALESCE(_s5.sum_sum_expr_22, 0) AS n5 -FROM tpch.region AS region -JOIN _s5 AS _s5 - ON _s5.n_regionkey = region.r_regionkey From 078347ea8b14daa8740b15fefb056caffb137343 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 30 Jan 2026 13:35:57 -0800 Subject: [PATCH 13/22] Adding functionality to work with HAS children --- pydough/conversion/hybrid_filter_merger.py | 21 ++++++++++++++++++- .../count_multiple_filters_c.txt | 11 ++++------ .../count_multiple_filters_d.txt | 11 ++++------ 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index b6280b219..ec0c17f94 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -113,6 +113,24 @@ def merge_filters(self, tree: HybridTree) -> None: tree.children[source_idx].min_steps, ) + # Add a new filter for the extra conditions from the source + # subtree if it was an ONLY_MATCH, checking whether the SUM + # is not zero, indicating that there was a match. + if ( + tree.children[source_idx].connection_type + == ConnectionType.AGGREGATION_ONLY_MATCH + ): + tree.add_operation( + HybridFilter( + tree.pipeline[-1], + HybridFunctionExpr( + pydop.NEQ, + [agg_ref, HybridLiteralExpr(Literal(0, NumericType()))], + BooleanType(), + ), + ) + ) + # TODO ADD COMMENT for operation in tree.pipeline: operation.replace_expressions(replacement_map) @@ -134,7 +152,8 @@ def identify_mergeable_children(self, tree: HybridTree) -> set[int]: idx for idx, child in enumerate(tree.children) if ( - child.connection_type == ConnectionType.AGGREGATION + child.connection_type + in (ConnectionType.AGGREGATION, ConnectionType.AGGREGATION_ONLY_MATCH) and {repr(v) for v in child.aggs.values()} == {"COUNT()"} ) } diff --git a/tests/test_plan_refsols/count_multiple_filters_c.txt b/tests/test_plan_refsols/count_multiple_filters_c.txt index 7fd785d58..21535cfa1 100644 --- a/tests/test_plan_refsols/count_multiple_filters_c.txt +++ b/tests/test_plan_refsols/count_multiple_filters_c.txt @@ -1,8 +1,5 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n2': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr)], orderings=[]) + FILTER(condition=sum_expr != 0:numeric, columns={'n_rows': n_rows, 'sum_expr': sum_expr}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string)}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_d.txt b/tests/test_plan_refsols/count_multiple_filters_d.txt index 7fd785d58..21535cfa1 100644 --- a/tests/test_plan_refsols/count_multiple_filters_d.txt +++ b/tests/test_plan_refsols/count_multiple_filters_d.txt @@ -1,8 +1,5 @@ -ROOT(columns=[('n1', n_rows), ('n2', n2)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n2': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={}) +ROOT(columns=[('n1', n_rows), ('n2', sum_expr)], orderings=[]) + FILTER(condition=sum_expr != 0:numeric, columns={'n_rows': n_rows, 'sum_expr': sum_expr}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string)}) + FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) From bcf7fa784a9751be7bcc35f1f4b5881e7982c16d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 30 Jan 2026 14:42:22 -0800 Subject: [PATCH 14/22] Adding partial disjoint merge handling --- pydough/conversion/hybrid_filter_merger.py | 249 +++++++++++++----- tests/test_pipeline_tpch_custom.py | 126 ++++++++- .../count_multiple_filters_g.txt | 38 ++- .../count_multiple_filters_j.txt | 26 +- .../count_multiple_filters_k.txt | 28 +- .../count_multiple_filters_l.txt | 26 +- .../count_multiple_filters_m.txt | 28 +- .../count_multiple_filters_n.txt | 28 +- .../count_multiple_filters_o.txt | 28 +- .../count_multiple_filters_p.txt | 28 +- .../count_multiple_filters_q.txt | 7 + .../count_multiple_filters_r.txt | 8 + .../count_multiple_filters_s.txt | 8 + .../count_multiple_filters_t.txt | 8 + .../count_multiple_filters_u.txt | 8 + 15 files changed, 435 insertions(+), 209 deletions(-) create mode 100644 tests/test_plan_refsols/count_multiple_filters_q.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_r.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_s.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_t.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_u.txt diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index ec0c17f94..750acca9a 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -42,6 +42,9 @@ def merge_filters(self, tree: HybridTree) -> None: """ TODO """ + # Keep a set of all children that are marked for certain deletion. + must_delete: set[int] = set() + # Run the main procedure on subtrees with multiple children. if len(tree.children) > 1: # Identify which children are only used by a COUNT aggregation that is @@ -66,76 +69,38 @@ def merge_filters(self, tree: HybridTree) -> None: for source_idx, target_idx in enumerate(filter_dag): if target_idx is None: continue - extra_filters: set[HybridExpr] = ( + extra_source_filters: set[HybridExpr] = ( child_filters[source_idx] - child_filters[target_idx] ) - assert len(extra_filters) > 0 - new_cond: HybridExpr - if len(extra_filters) == 1: - new_cond = next(iter(extra_filters)) - else: - new_cond = HybridFunctionExpr( - pydop.BAN, - sorted(extra_filters, key=repr), - BooleanType(), - ) - numeric_expr: HybridExpr = HybridFunctionExpr( - pydop.IFF, - [ - new_cond, - HybridLiteralExpr(Literal(1, NumericType())), - HybridLiteralExpr(Literal(0, NumericType())), - ], - NumericType(), - ) - sum_expr: HybridFunctionExpr = HybridFunctionExpr( - pydop.SUM, - [numeric_expr], - NumericType(), - ) - agg_name: str = self.translator.gen_agg_name(tree.children[target_idx]) - tree.children[target_idx].aggs[agg_name] = sum_expr - agg_ref: HybridExpr = HybridChildRefExpr( - agg_name, target_idx, NumericType() - ) - old_agg_ref = HybridChildRefExpr( - next(iter(tree.children[source_idx].aggs)), - source_idx, - NumericType(), + extra_target_filters: set[HybridExpr] = ( + child_filters[target_idx] - child_filters[source_idx] ) - replacement_map[old_agg_ref] = agg_ref - tree.children[target_idx].max_steps = min( - tree.children[target_idx].max_steps, - tree.children[source_idx].max_steps, - ) - tree.children[target_idx].min_steps = min( - tree.children[target_idx].min_steps, - tree.children[source_idx].min_steps, - ) - - # Add a new filter for the extra conditions from the source - # subtree if it was an ONLY_MATCH, checking whether the SUM - # is not zero, indicating that there was a match. - if ( - tree.children[source_idx].connection_type - == ConnectionType.AGGREGATION_ONLY_MATCH - ): - tree.add_operation( - HybridFilter( - tree.pipeline[-1], - HybridFunctionExpr( - pydop.NEQ, - [agg_ref, HybridLiteralExpr(Literal(0, NumericType()))], - BooleanType(), - ), - ) + assert len(extra_source_filters) > 0 + if len(extra_target_filters) == 0: + self.merge_subset_filters( + tree, + source_idx, + target_idx, + extra_source_filters, + replacement_map, + must_delete, + ) + else: + self.merge_partial_disjoint_filters( + tree, + source_idx, + target_idx, + extra_source_filters, + extra_target_filters, + replacement_map, + must_delete, ) # TODO ADD COMMENT for operation in tree.pipeline: operation.replace_expressions(replacement_map) - tree.remove_dead_children(set()) + tree.remove_dead_children(must_delete) # Run the procedure recursively on the parent tree and the child # subtrees. @@ -144,6 +109,146 @@ def merge_filters(self, tree: HybridTree) -> None: for child in tree.children: self.merge_filters(child.subtree) + def merge_subset_filters( + self, + tree: HybridTree, + source_idx: int, + target_idx: int, + extra_source_filters: set[HybridExpr], + replacement_map: dict[HybridExpr, HybridExpr], + must_delete: set[int], + ) -> None: + """ + TODO + """ + new_cond: HybridExpr + if len(extra_source_filters) == 1: + new_cond = next(iter(extra_source_filters)) + else: + new_cond = HybridFunctionExpr( + pydop.BAN, + sorted(extra_source_filters, key=repr), + BooleanType(), + ) + numeric_expr: HybridExpr = HybridFunctionExpr( + pydop.IFF, + [ + new_cond, + HybridLiteralExpr(Literal(1, NumericType())), + HybridLiteralExpr(Literal(0, NumericType())), + ], + NumericType(), + ) + sum_expr: HybridFunctionExpr = HybridFunctionExpr( + pydop.SUM, + [numeric_expr], + NumericType(), + ) + agg_name: str = self.translator.gen_agg_name(tree.children[target_idx]) + tree.children[target_idx].aggs[agg_name] = sum_expr + agg_ref: HybridExpr = HybridChildRefExpr(agg_name, target_idx, NumericType()) + old_agg_ref = HybridChildRefExpr( + next( + name + for name, expr in tree.children[source_idx].aggs.items() + if repr(expr) == "COUNT()" + ), + source_idx, + NumericType(), + ) + replacement_map[old_agg_ref] = agg_ref + tree.children[target_idx].max_steps = min( + tree.children[target_idx].max_steps, + tree.children[source_idx].max_steps, + ) + tree.children[target_idx].min_steps = min( + tree.children[target_idx].min_steps, + tree.children[source_idx].min_steps, + ) + + # Add a new filter for the extra conditions from the source + # subtree if it was an ONLY_MATCH, checking whether the SUM + # is not zero, indicating that there was a match. + if ( + tree.children[source_idx].connection_type + == ConnectionType.AGGREGATION_ONLY_MATCH + ): + tree.add_operation( + HybridFilter( + tree.pipeline[-1], + HybridFunctionExpr( + pydop.NEQ, + [agg_ref, HybridLiteralExpr(Literal(0, NumericType()))], + BooleanType(), + ), + ) + ) + + if source_idx != target_idx: + must_delete.add(source_idx) + + def merge_partial_disjoint_filters( + self, + tree: HybridTree, + source_idx: int, + target_idx: int, + extra_source_filters: set[HybridExpr], + extra_target_filters: set[HybridExpr], + replacement_map: dict[HybridExpr, HybridExpr], + must_delete: set[int], + ) -> None: + """ + TODO + """ + # TODO ADD COMMENTS + self.merge_subset_filters( + tree, + source_idx, + target_idx, + extra_source_filters, + replacement_map, + must_delete, + ) + self.merge_subset_filters( + tree, + target_idx, + target_idx, + extra_target_filters, + replacement_map, + must_delete, + ) + + new_cond: HybridExpr + if len(extra_source_filters) == 1: + new_cond = next(iter(extra_source_filters)) + else: + new_cond = HybridFunctionExpr( + pydop.BAN, + sorted(extra_source_filters, key=repr), + BooleanType(), + ) + + # Now go back through the target subtree, find any existing filters + # after any window/limit, and make them a disjunction of the existing + # filter and the new filter conditions. + for operation in reversed(tree.children[target_idx].subtree.pipeline): + if isinstance(operation, HybridFilter): + if operation.condition.contains_window_functions(): + break + operation.condition = HybridFunctionExpr( + pydop.BOR, + [operation.condition, new_cond], + BooleanType(), + ) + elif isinstance(operation, HybridLimit): + break + elif isinstance(operation, HybridCalculate): + if any( + expr.contains_window_functions() + for expr in operation.new_expressions.values() + ): + break + def identify_mergeable_children(self, tree: HybridTree) -> set[int]: """ TODO @@ -224,11 +329,31 @@ def make_filter_dag( TODO """ dag: list[int | None] = [None for _ in range(len(child_filters))] + # Build initial edges from each mergeable child to another isomorphic + # child that is a subset of its filter list. for idx in mergeable_children: - for other_idx in child_isomorphisms[idx]: + for other_idx in sorted(child_isomorphisms[idx]): if child_filters[other_idx] < child_filters[idx]: dag[idx] = other_idx break + + # Form secondary edges between island nodes that are not subsets of + # one another but where both of them are mergeable, and neither one is + # the sink of an edge yet. + existing_sinks: set[int | None] = set(dag) + for idx in mergeable_children: + for other_idx in sorted(child_isomorphisms[idx]): + if ( + other_idx in mergeable_children + and dag[idx] is None + and dag[other_idx] is None + and idx not in existing_sinks + and other_idx not in existing_sinks + ): + dag[idx] = other_idx + break + + # Collapse transitive edges for idx in range(len(dag)): if dag[idx] is not None: while True: diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 7da160a51..6ea89e574 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2828,7 +2828,6 @@ "c4 = customers.WHERE(nation.name == 'CHINA').WHERE(PERCENTILE(by=account_balance.ASC()) == 100)\n" "c5 = customers.WHERE((PERCENTILE(by=account_balance.ASC()) == 100) & (nation.name == 'CHINA'))\n" "c6 = customers.WHERE(nation.name == 'CHINA')\n" - "c6 = customers.WHERE(nation.name == 'CHINA')\n" "result = TPCH.CALCULATE(" " n1=COUNT(c1), " " n2=COUNT(c2), " @@ -3139,6 +3138,131 @@ ), id="count_multiple_filters_p", ), + pytest.param( + PyDoughPandasTest( + "c1 = orders.WHERE(order_priority == '1-URGENT')\n" + "c2 = orders.WHERE(order_priority == '2-HIGH')\n" + "c3 = orders.WHERE(order_priority == '3-MEDIUM')\n" + "result = customers.WHERE(ISIN(key, [2, 3, 4, 17, 23, 26, 380, 827])).CALCULATE(" + " customer_key=key, " + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "customer_key": [2, 3, 4, 17, 23, 26, 380, 827], + "n1": [4, 0, 3, 0, 2, 0, 2, 0], + "n2": [1, 0, 5, 0, 0, 1, 0, 4], + "n3": [0, 0, 7, 2, 3, 2, 0, 0], + } + ), + "count_multiple_filters_q", + skip_sql=True, + ), + id="count_multiple_filters_q", + ), + pytest.param( + PyDoughPandasTest( + "c1 = orders.WHERE(order_priority == '1-URGENT')\n" + "c2 = orders.WHERE(order_priority == '2-HIGH')\n" + "c3 = orders.WHERE(order_priority == '3-MEDIUM')\n" + "result = customers.WHERE(ISIN(key, [2, 3, 4, 17, 23, 26, 380, 827])).CALCULATE(" + " customer_key=key, " + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ").WHERE(HAS(c1))", + "TPCH", + lambda: pd.DataFrame( + { + "customer_key": [2, 4, 23, 380], + "n1": [4, 3, 2, 2], + "n2": [1, 5, 0, 0], + "n3": [0, 7, 3, 0], + } + ), + "count_multiple_filters_r", + skip_sql=True, + ), + id="count_multiple_filters_r", + ), + pytest.param( + PyDoughPandasTest( + "c1 = orders.WHERE(order_priority == '1-URGENT')\n" + "c2 = orders.WHERE(order_priority == '2-HIGH')\n" + "c3 = orders.WHERE(order_priority == '3-MEDIUM')\n" + "result = customers.WHERE(ISIN(key, [2, 3, 4, 17, 23, 26, 380, 827])).CALCULATE(" + " customer_key=key, " + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ").WHERE(HAS(c2))", + "TPCH", + lambda: pd.DataFrame( + { + "customer_key": [2, 4, 26, 827], + "n1": [4, 3, 0, 0], + "n2": [1, 5, 1, 4], + "n3": [0, 7, 2, 0], + } + ), + "count_multiple_filters_s", + skip_sql=True, + ), + id="count_multiple_filters_s", + ), + pytest.param( + PyDoughPandasTest( + "c1 = orders.WHERE(order_priority == '1-URGENT')\n" + "c2 = orders.WHERE(order_priority == '2-HIGH')\n" + "c3 = orders.WHERE(order_priority == '3-MEDIUM')\n" + "result = customers.WHERE(ISIN(key, [2, 3, 4, 17, 23, 26, 380, 827])).CALCULATE(" + " customer_key=key, " + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ").WHERE(HAS(c3))", + "TPCH", + lambda: pd.DataFrame( + { + "customer_key": [4, 17, 23, 26], + "n1": [3, 0, 2, 0], + "n2": [5, 0, 0, 1], + "n3": [7, 2, 3, 2], + } + ), + "count_multiple_filters_t", + skip_sql=True, + ), + id="count_multiple_filters_t", + ), + pytest.param( + PyDoughPandasTest( + "c1 = orders.WHERE(order_priority == '1-URGENT')\n" + "c2 = orders.WHERE(order_priority == '2-HIGH')\n" + "c3 = orders.WHERE(order_priority == '3-MEDIUM')\n" + "result = customers.WHERE(ISIN(key, [2, 3, 4, 17, 23, 26, 380, 827])).CALCULATE(" + " customer_key=key, " + " n1=COUNT(c1), " + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ").WHERE(HAS(c1) & HAS(c2) & HAS(c3))", + "TPCH", + lambda: pd.DataFrame( + { + "customer_key": [4], + "n1": [3], + "n2": [5], + "n3": [7], + } + ), + "count_multiple_filters_u", + skip_sql=True, + ), + id="count_multiple_filters_u", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/count_multiple_filters_g.txt b/tests/test_plan_refsols/count_multiple_filters_g.txt index b80d0ef1c..4ab4fbe1a 100644 --- a/tests/test_plan_refsols/count_multiple_filters_g.txt +++ b/tests/test_plan_refsols/count_multiple_filters_g.txt @@ -1,23 +1,17 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ('n5', agg_4), ('n6', agg_5)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'agg_5': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_agg_1': t0.sum_agg_1}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_agg_1': t1.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', agg_3), ('n5', agg_4), ('n6', sum_expr_9)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'n_rows': t0.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_9': t1.sum_expr_9}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) @@ -29,8 +23,8 @@ ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_agg_1), ('n4', agg_3), ( JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(n_name == 'GERMANY':string), 'sum_expr_9': SUM(n_name == 'CHINA':string)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + FILTER(condition=n_name == 'CHINA':string | n_name == 'GERMANY':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_j.txt b/tests/test_plan_refsols/count_multiple_filters_j.txt index 0778fc83d..91b2ee95a 100644 --- a/tests/test_plan_refsols/count_multiple_filters_j.txt +++ b/tests/test_plan_refsols/count_multiple_filters_j.txt @@ -1,19 +1,11 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr_5, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_k.txt b/tests/test_plan_refsols/count_multiple_filters_k.txt index e0c244bd8..21042aa6f 100644 --- a/tests/test_plan_refsols/count_multiple_filters_k.txt +++ b/tests/test_plan_refsols/count_multiple_filters_k.txt @@ -1,20 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_expr_5 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) + FILTER(condition=sum_sum_expr != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_l.txt b/tests/test_plan_refsols/count_multiple_filters_l.txt index 0f36d4f4b..65a513867 100644 --- a/tests/test_plan_refsols/count_multiple_filters_l.txt +++ b/tests/test_plan_refsols/count_multiple_filters_l.txt @@ -1,19 +1,11 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr_5, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_m.txt b/tests/test_plan_refsols/count_multiple_filters_m.txt index 8e90879f9..755afcac1 100644 --- a/tests/test_plan_refsols/count_multiple_filters_m.txt +++ b/tests/test_plan_refsols/count_multiple_filters_m.txt @@ -1,20 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_expr_5 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + FILTER(condition=sum_sum_expr != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_n.txt b/tests/test_plan_refsols/count_multiple_filters_n.txt index 4d6be32e7..e2a8fc9e1 100644 --- a/tests/test_plan_refsols/count_multiple_filters_n.txt +++ b/tests/test_plan_refsols/count_multiple_filters_n.txt @@ -1,20 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_8), ('n4', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', sum_sum_expr_19), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_expr_5 != 0:numeric & sum_expr_8 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_19 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_o.txt b/tests/test_plan_refsols/count_multiple_filters_o.txt index c3406625f..00a2a6983 100644 --- a/tests/test_plan_refsols/count_multiple_filters_o.txt +++ b/tests/test_plan_refsols/count_multiple_filters_o.txt @@ -1,20 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n4', sum_n_rows)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', sum_sum_expr_20)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_expr_5 != 0:numeric & sum_n_rows != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_20 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_p.txt b/tests/test_plan_refsols/count_multiple_filters_p.txt index 8a1408be8..f6735c444 100644 --- a/tests/test_plan_refsols/count_multiple_filters_p.txt +++ b/tests/test_plan_refsols/count_multiple_filters_p.txt @@ -1,20 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_8), ('n4', sum_n_rows)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_expr, 'sum_expr_8': t1.sum_expr_8, 'sum_n_rows': t1.sum_n_rows}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', sum_sum_expr_19), ('n4', sum_sum_expr_20)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_expr_5 != 0:numeric & sum_expr_8 != 0:numeric & sum_n_rows != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_expr': sum_expr_5, 'sum_expr_8': sum_expr_8, 'sum_n_rows': sum_n_rows}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_expr_5': SUM(expr_5), 'sum_expr_8': SUM(expr_8), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'expr_5': t0.expr_5, 'expr_8': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'expr_5': t0.n_rows, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) - SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey}) + FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_19 != 0:numeric & sum_sum_expr_20 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_q.txt b/tests/test_plan_refsols/count_multiple_filters_q.txt new file mode 100644 index 000000000..9fd0e560f --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_q.txt @@ -0,0 +1,7 @@ +ROOT(columns=[('customer_key', c_custkey), ('n1', DEFAULT_TO(sum_expr, 0:numeric)), ('n2', DEFAULT_TO(sum_expr_9, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_10, 0:numeric))], orderings=[]) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_orderpriority': t1.o_orderpriority}) + FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_r.txt b/tests/test_plan_refsols/count_multiple_filters_r.txt new file mode 100644 index 000000000..e98e49fc6 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_r.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) + FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=sum_expr != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_s.txt b/tests/test_plan_refsols/count_multiple_filters_s.txt new file mode 100644 index 000000000..3dc165039 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_s.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) + FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=sum_expr_9 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_t.txt b/tests/test_plan_refsols/count_multiple_filters_t.txt new file mode 100644 index 000000000..7730768d5 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_t.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) + FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=sum_expr_10 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_u.txt b/tests/test_plan_refsols/count_multiple_filters_u.txt new file mode 100644 index 000000000..2599b1b79 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_u.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) + FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) + FILTER(condition=sum_expr != 0:numeric & sum_expr_10 != 0:numeric & sum_expr_9 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) From 9012d0f8fe017ef55f3e4540c4a088c738c1c89d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 30 Jan 2026 14:55:27 -0800 Subject: [PATCH 15/22] Updating restaurant_gen14 --- .../defog_test_functions.py | 2 +- .../defog_restaurants_gen14_ansi.sql | 15 +++++++-------- .../defog_restaurants_gen14_mysql.sql | 15 +++++++-------- .../defog_restaurants_gen14_postgres.sql | 15 +++++++-------- .../defog_restaurants_gen14_snowflake.sql | 15 +++++++-------- .../defog_restaurants_gen14_sqlite.sql | 15 +++++++-------- 6 files changed, 36 insertions(+), 41 deletions(-) diff --git a/tests/test_pydough_functions/defog_test_functions.py b/tests/test_pydough_functions/defog_test_functions.py index 600b4700c..a737977b2 100644 --- a/tests/test_pydough_functions/defog_test_functions.py +++ b/tests/test_pydough_functions/defog_test_functions.py @@ -2968,7 +2968,7 @@ def impl_defog_restaurants_gen14(): """ sf_restaurants = restaurants.WHERE(LOWER(city_name) == "san francisco") n_vegan = COUNT(sf_restaurants.WHERE(LOWER(food_type) == "vegan")) - n_non_vegan = COUNT(sf_restaurants) - n_vegan + n_non_vegan = COUNT(sf_restaurants.WHERE(LOWER(food_type) != "vegan")) return Restaurants.CALCULATE( ratio=(n_vegan / KEEP_IF(n_non_vegan, n_non_vegan != 0)) ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql index 586affd62..0bc5f4bfb 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql @@ -1,11 +1,10 @@ SELECT - SUM(LOWER(food_type) = 'vegan') / CASE - WHEN ( - COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ) <> 0 - THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ELSE NULL - END AS ratio + SUM(LOWER(food_type) = 'vegan') / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' + ( + LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' + ) + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql index 586affd62..0bc5f4bfb 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql @@ -1,11 +1,10 @@ SELECT - SUM(LOWER(food_type) = 'vegan') / CASE - WHEN ( - COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ) <> 0 - THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ELSE NULL - END AS ratio + SUM(LOWER(food_type) = 'vegan') / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' + ( + LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' + ) + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql index 9d98b0324..5b65bdcb0 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql @@ -1,11 +1,10 @@ SELECT - CAST(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / CASE - WHEN ( - COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) - ) <> 0 - THEN COUNT(*) - SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) - ELSE NULL - END AS ratio + CAST(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' + ( + LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' + ) + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql index cfee256b5..1de217128 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql @@ -1,11 +1,10 @@ SELECT - COUNT_IF(LOWER(food_type) = 'vegan') / CASE - WHEN ( - COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') - ) <> 0 - THEN COUNT(*) - COUNT_IF(LOWER(food_type) = 'vegan') - ELSE NULL - END AS ratio + COUNT_IF(LOWER(food_type) = 'vegan') / NULLIF(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' + ( + LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' + ) + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index a4f7ac562..f17affb11 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -1,11 +1,10 @@ SELECT - CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / CASE - WHEN ( - COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ) <> 0 - THEN COUNT(*) - SUM(LOWER(food_type) = 'vegan') - ELSE NULL - END AS ratio + CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' + ( + LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' + ) + AND ( + LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' + ) From 0d77988a4f7d2cbba0245ac37b4c526879f26f21 Mon Sep 17 00:00:00 2001 From: Hadia Ahmed Date: Fri, 6 Feb 2026 13:59:21 -0800 Subject: [PATCH 16/22] [run all] address comments --- tests/test_pipeline_tpch_custom.py | 50 ++++++++++++------- .../has_cross_correlated.txt | 5 ++ .../has_cross_correlated_singular.txt | 5 ++ .../redundant_has_not_on_singular.txt | 8 +++ .../has_cross_correlated_ansi.sql | 5 ++ .../has_cross_correlated_mysql.sql | 5 ++ .../has_cross_correlated_postgres.sql | 5 ++ .../has_cross_correlated_singular_ansi.sql | 5 ++ .../has_cross_correlated_singular_mysql.sql | 14 ++++++ ...has_cross_correlated_singular_postgres.sql | 14 ++++++ ...as_cross_correlated_singular_snowflake.sql | 14 ++++++ .../has_cross_correlated_singular_sqlite.sql | 14 ++++++ .../has_cross_correlated_snowflake.sql | 5 ++ .../has_cross_correlated_sqlite.sql | 5 ++ .../redundant_has_not_on_singular_ansi.sql | 7 +++ .../redundant_has_not_on_singular_mysql.sql | 16 ++++++ ...redundant_has_not_on_singular_postgres.sql | 16 ++++++ ...edundant_has_not_on_singular_snowflake.sql | 16 ++++++ .../redundant_has_not_on_singular_sqlite.sql | 16 ++++++ 19 files changed, 206 insertions(+), 19 deletions(-) create mode 100644 tests/test_plan_refsols/has_cross_correlated.txt create mode 100644 tests/test_plan_refsols/has_cross_correlated_singular.txt create mode 100644 tests/test_plan_refsols/redundant_has_not_on_singular.txt create mode 100644 tests/test_sql_refsols/has_cross_correlated_ansi.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_mysql.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_postgres.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_singular_ansi.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_singular_mysql.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_singular_postgres.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_singular_snowflake.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_singular_sqlite.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_snowflake.sql create mode 100644 tests/test_sql_refsols/has_cross_correlated_sqlite.sql create mode 100644 tests/test_sql_refsols/redundant_has_not_on_singular_ansi.sql create mode 100644 tests/test_sql_refsols/redundant_has_not_on_singular_mysql.sql create mode 100644 tests/test_sql_refsols/redundant_has_not_on_singular_postgres.sql create mode 100644 tests/test_sql_refsols/redundant_has_not_on_singular_snowflake.sql create mode 100644 tests/test_sql_refsols/redundant_has_not_on_singular_sqlite.sql diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 6caac393e..0b3ad655f 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -2741,7 +2741,8 @@ ), pytest.param( PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation.WHERE(region.name == 'ASIA')))))", + "asian_nations = nation.WHERE(region.name == 'ASIA')\n" + "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(asian_nations))))", "TPCH", lambda: pd.DataFrame( { @@ -2755,7 +2756,9 @@ # Nested HAS on singular chain (supplier -> nation -> region), both should optimize to INNER pytest.param( PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(HAS(region.WHERE(name == 'AFRICA')))))))", + "african_regions = region.WHERE(name == 'AFRICA')\n" + "african_nations = nation.WHERE(HAS(african_regions))\n" + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(african_nations))))", "TPCH", lambda: pd.DataFrame( { @@ -2783,7 +2786,8 @@ # HAS on singular relationship with additional filter pytest.param( PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(nation.WHERE(region.name == 'EUROPE')))))", + "european_nations = nation.WHERE(region.name == 'EUROPE')\n" + "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HAS(european_nations))))", "TPCH", lambda: pd.DataFrame( { @@ -2808,7 +2812,7 @@ ), id="redundant_has_on_plural_lineitems", ), - # HASNOT on singular relationship - should optimize to ANTI join or similar + # No optimization , stay as ANTI. pytest.param( PyDoughPandasTest( "result = TPCH.CALCULATE(n=COUNT(suppliers.WHERE(HASNOT(nation.WHERE(region.name == 'AFRICA')))))", @@ -2819,42 +2823,50 @@ } ), "redundant_has_not_on_singular", - skip_relational=True, - skip_sql=True, ), id="redundant_has_not_on_singular", ), - # HAS without WHERE filter on singular - should optimize to INNER + # HAS containing CROSS with correlated filter back to outer context. + # Customers who have a supplier from their same nation (via CROSS). + # Should NOT optimize since CROSS creates a plural relationship. pytest.param( PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(customers.WHERE(HAS(nation))))", + "selected = customers.CALCULATE(my_nation_key=nation.key)\n" + "result = TPCH.CALCULATE(\n" + " n=COUNT(selected.WHERE(HAS(\n" + " CROSS(suppliers).WHERE(nation.key == my_nation_key)\n" + " )))\n" + ")", "TPCH", lambda: pd.DataFrame( { "n": [150000], } ), - "redundant_has_no_filter_singular", - skip_relational=True, - skip_sql=True, + "has_cross_correlated", ), - id="redundant_has_no_filter_singular", + id="has_cross_correlated", ), - # HAS on singular within plural context - orders whose customer is from ASIA + # HAS containing CROSS with correlated filter and SINGULAR. + # The filter ensures exactly one match per row, SINGULAR enforces it. + # Optimizes to INNER JOIN since SINGULAR makes the relationship singular. pytest.param( PyDoughPandasTest( - "result = TPCH.CALCULATE(n=COUNT(orders.WHERE(HAS(customer.WHERE(nation.region.name == 'ASIA')))))", + "selected = customers.CALCULATE(my_nation_key=nation.key)\n" + "result = TPCH.CALCULATE(\n" + " n=COUNT(selected.WHERE(HAS(\n" + " CROSS(nations).WHERE(key == my_nation_key).SINGULAR()\n" + " )))\n" + ")", "TPCH", lambda: pd.DataFrame( { - "n": [301740], + "n": [150000], } ), - "redundant_has_singular_in_plural_context", - skip_relational=True, - skip_sql=True, + "has_cross_correlated_singular", ), - id="redundant_has_singular_in_plural_context", + id="has_cross_correlated_singular", ), pytest.param( PyDoughPandasTest( diff --git a/tests/test_plan_refsols/has_cross_correlated.txt b/tests/test_plan_refsols/has_cross_correlated.txt new file mode 100644 index 000000000..58277ebde --- /dev/null +++ b/tests/test_plan_refsols/has_cross_correlated.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('n', ndistinct_c_custkey)], orderings=[]) + AGGREGATE(keys={}, aggregations={'ndistinct_c_custkey': NDISTINCT(c_custkey)}) + JOIN(condition=t1.s_nationkey == t0.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/has_cross_correlated_singular.txt b/tests/test_plan_refsols/has_cross_correlated_singular.txt new file mode 100644 index 000000000..52f473208 --- /dev/null +++ b/tests/test_plan_refsols/has_cross_correlated_singular.txt @@ -0,0 +1,5 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=SEMI, columns={}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/redundant_has_not_on_singular.txt b/tests/test_plan_refsols/redundant_has_not_on_singular.txt new file mode 100644 index 000000000..a24d4e790 --- /dev/null +++ b/tests/test_plan_refsols/redundant_has_not_on_singular.txt @@ -0,0 +1,8 @@ +ROOT(columns=[('n', n_rows)], orderings=[]) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=ANTI, columns={}) + SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=r_name == 'AFRICA':string, columns={'r_regionkey': r_regionkey}) + SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_sql_refsols/has_cross_correlated_ansi.sql b/tests/test_sql_refsols/has_cross_correlated_ansi.sql new file mode 100644 index 000000000..1f66f7742 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT customer.c_custkey) AS n +FROM tpch.customer AS customer +JOIN tpch.supplier AS supplier + ON customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/has_cross_correlated_mysql.sql b/tests/test_sql_refsols/has_cross_correlated_mysql.sql new file mode 100644 index 000000000..25c69523c --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_mysql.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT CUSTOMER.c_custkey) AS n +FROM tpch.CUSTOMER AS CUSTOMER +JOIN tpch.SUPPLIER AS SUPPLIER + ON CUSTOMER.c_nationkey = SUPPLIER.s_nationkey diff --git a/tests/test_sql_refsols/has_cross_correlated_postgres.sql b/tests/test_sql_refsols/has_cross_correlated_postgres.sql new file mode 100644 index 000000000..1f66f7742 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_postgres.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT customer.c_custkey) AS n +FROM tpch.customer AS customer +JOIN tpch.supplier AS supplier + ON customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_ansi.sql b/tests/test_sql_refsols/has_cross_correlated_singular_ansi.sql new file mode 100644 index 000000000..edf6ee242 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_ansi.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_mysql.sql b/tests/test_sql_refsols/has_cross_correlated_singular_mysql.sql new file mode 100644 index 000000000..157fd0cab --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_mysql.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + n_nationkey AS _u_1 + FROM tpch.NATION + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.CUSTOMER AS CUSTOMER +LEFT JOIN _u_0 AS _u_0 + ON CUSTOMER.c_nationkey = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_postgres.sql b/tests/test_sql_refsols/has_cross_correlated_singular_postgres.sql new file mode 100644 index 000000000..afc70b571 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_postgres.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + n_nationkey AS _u_1 + FROM tpch.nation + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_nationkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_snowflake.sql b/tests/test_sql_refsols/has_cross_correlated_singular_snowflake.sql new file mode 100644 index 000000000..afc70b571 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_snowflake.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + n_nationkey AS _u_1 + FROM tpch.nation + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_nationkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/has_cross_correlated_singular_sqlite.sql b/tests/test_sql_refsols/has_cross_correlated_singular_sqlite.sql new file mode 100644 index 000000000..afc70b571 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_singular_sqlite.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + n_nationkey AS _u_1 + FROM tpch.nation + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.customer AS customer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customer.c_nationkey +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/has_cross_correlated_snowflake.sql b/tests/test_sql_refsols/has_cross_correlated_snowflake.sql new file mode 100644 index 000000000..1f66f7742 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT customer.c_custkey) AS n +FROM tpch.customer AS customer +JOIN tpch.supplier AS supplier + ON customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/has_cross_correlated_sqlite.sql b/tests/test_sql_refsols/has_cross_correlated_sqlite.sql new file mode 100644 index 000000000..1f66f7742 --- /dev/null +++ b/tests/test_sql_refsols/has_cross_correlated_sqlite.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(DISTINCT customer.c_custkey) AS n +FROM tpch.customer AS customer +JOIN tpch.supplier AS supplier + ON customer.c_nationkey = supplier.s_nationkey diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_ansi.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_ansi.sql new file mode 100644 index 000000000..666260dff --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_ansi.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_mysql.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_mysql.sql new file mode 100644 index 000000000..f7a716c60 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_mysql.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + NATION.n_nationkey AS _u_1 + FROM tpch.NATION AS NATION + JOIN tpch.REGION AS REGION + ON NATION.n_regionkey = REGION.r_regionkey AND REGION.r_name = 'AFRICA' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.SUPPLIER AS SUPPLIER +LEFT JOIN _u_0 AS _u_0 + ON SUPPLIER.s_nationkey = _u_0._u_1 +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_postgres.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_postgres.sql new file mode 100644 index 000000000..431b9ed82 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_postgres.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + nation.n_nationkey AS _u_1 + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = supplier.s_nationkey +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_snowflake.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_snowflake.sql new file mode 100644 index 000000000..431b9ed82 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + nation.n_nationkey AS _u_1 + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = supplier.s_nationkey +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/redundant_has_not_on_singular_sqlite.sql b/tests/test_sql_refsols/redundant_has_not_on_singular_sqlite.sql new file mode 100644 index 000000000..431b9ed82 --- /dev/null +++ b/tests/test_sql_refsols/redundant_has_not_on_singular_sqlite.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + nation.n_nationkey AS _u_1 + FROM tpch.nation AS nation + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AFRICA' + GROUP BY + 1 +) +SELECT + COUNT(*) AS n +FROM tpch.supplier AS supplier +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = supplier.s_nationkey +WHERE + _u_0._u_1 IS NULL From bb494978fb2cc071b1fab91638ba9c696c754895 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Fri, 6 Feb 2026 14:37:58 -0800 Subject: [PATCH 17/22] Fixing disjunction issues and new tests --- pydough/conversion/hybrid_filter_merger.py | 146 ++++++++++++------ tests/test_pipeline_tpch_custom.py | 53 +++++++ tests/test_plan_refsols/common_prefix_d.txt | 28 ++-- .../count_multiple_filters_g.txt | 34 ++-- .../count_multiple_filters_j.txt | 12 +- .../count_multiple_filters_k.txt | 14 +- .../count_multiple_filters_l.txt | 12 +- .../count_multiple_filters_m.txt | 14 +- .../count_multiple_filters_n.txt | 14 +- .../count_multiple_filters_o.txt | 14 +- .../count_multiple_filters_p.txt | 14 +- .../count_multiple_filters_q.txt | 6 +- .../count_multiple_filters_r.txt | 10 +- .../count_multiple_filters_s.txt | 10 +- .../count_multiple_filters_t.txt | 10 +- .../count_multiple_filters_u.txt | 10 +- .../count_multiple_filters_v.txt | 4 + .../count_multiple_filters_w.txt | 4 + .../defog_restaurants_gen14_ansi.sql | 4 +- .../defog_restaurants_gen14_mysql.sql | 4 +- .../defog_restaurants_gen14_postgres.sql | 4 +- .../defog_restaurants_gen14_snowflake.sql | 4 +- .../defog_restaurants_gen14_sqlite.sql | 4 +- 23 files changed, 260 insertions(+), 169 deletions(-) create mode 100644 tests/test_plan_refsols/count_multiple_filters_v.txt create mode 100644 tests/test_plan_refsols/count_multiple_filters_w.txt diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 750acca9a..c780a1bb4 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -42,7 +42,7 @@ def merge_filters(self, tree: HybridTree) -> None: """ TODO """ - # Keep a set of all children that are marked for certain deletion. + # Keep a set of all children that are marked for certain deletion.\ must_delete: set[int] = set() # Run the main procedure on subtrees with multiple children. @@ -64,6 +64,11 @@ def merge_filters(self, tree: HybridTree) -> None: mergeable_children, child_filters, child_isomorphisms ) + # TODO ADD COMMENT + secondary_merges: dict[int, set[int]] = self.make_secondary_merges( + mergeable_children, child_isomorphisms, filter_dag + ) + # TODO ADD COMMENT replacement_map: dict[HybridExpr, HybridExpr] = {} for source_idx, target_idx in enumerate(filter_dag): @@ -75,26 +80,25 @@ def merge_filters(self, tree: HybridTree) -> None: extra_target_filters: set[HybridExpr] = ( child_filters[target_idx] - child_filters[source_idx] ) - assert len(extra_source_filters) > 0 - if len(extra_target_filters) == 0: - self.merge_subset_filters( - tree, - source_idx, - target_idx, - extra_source_filters, - replacement_map, - must_delete, - ) - else: - self.merge_partial_disjoint_filters( - tree, - source_idx, - target_idx, - extra_source_filters, - extra_target_filters, - replacement_map, - must_delete, - ) + assert len(extra_source_filters) > 0 and len(extra_target_filters) == 0 + self.merge_subset_filters( + tree, + source_idx, + target_idx, + extra_source_filters, + replacement_map, + must_delete, + ) + + for target_idx, source_idxs in secondary_merges.items(): + self.merge_partial_disjoint_filters( + tree, + target_idx, + source_idxs, + child_filters, + replacement_map, + must_delete, + ) # TODO ADD COMMENT for operation in tree.pipeline: @@ -190,10 +194,9 @@ def merge_subset_filters( def merge_partial_disjoint_filters( self, tree: HybridTree, - source_idx: int, target_idx: int, - extra_source_filters: set[HybridExpr], - extra_target_filters: set[HybridExpr], + source_idxs: set[int], + all_filters: list[set[HybridExpr]], replacement_map: dict[HybridExpr, HybridExpr], must_delete: set[int], ) -> None: @@ -201,14 +204,26 @@ def merge_partial_disjoint_filters( TODO """ # TODO ADD COMMENTS - self.merge_subset_filters( - tree, - source_idx, - target_idx, - extra_source_filters, - replacement_map, - must_delete, + intersection = set.intersection( + *(all_filters[source_idx] for source_idx in source_idxs), + all_filters[target_idx], ) + + for source_idx in sorted(source_idxs): + extra_source_filters: set[HybridExpr] = ( + all_filters[source_idx] - intersection + ) + self.merge_subset_filters( + tree, + source_idx, + target_idx, + extra_source_filters, + replacement_map, + must_delete, + ) + + # TODO ADD COMMENTS + extra_target_filters: set[HybridExpr] = all_filters[target_idx] - intersection self.merge_subset_filters( tree, target_idx, @@ -218,13 +233,29 @@ def merge_partial_disjoint_filters( must_delete, ) + # TODO: ADD COMMENTS + new_conds: list[HybridExpr] = [] + for source_idx in sorted(source_idxs): + source_filters: set[HybridExpr] = all_filters[source_idx] + source_cond: HybridExpr + if len(source_filters) == 1: + source_cond = next(iter(source_filters)) + else: + source_cond = HybridFunctionExpr( + pydop.BAN, + sorted(source_filters, key=repr), + BooleanType(), + ) + new_conds.append(source_cond) + + # TODO: ADD COMMENTS new_cond: HybridExpr - if len(extra_source_filters) == 1: - new_cond = next(iter(extra_source_filters)) + if len(new_conds) == 1: + new_cond = new_conds[0] else: new_cond = HybridFunctionExpr( - pydop.BAN, - sorted(extra_source_filters, key=repr), + pydop.BOR, + new_conds, BooleanType(), ) @@ -337,28 +368,45 @@ def make_filter_dag( dag[idx] = other_idx break + # Collapse transitive edges + for idx in range(len(dag)): + if dag[idx] is not None: + while True: + target_idx: int | None = dag[idx] + if target_idx is None or dag[target_idx] is None: + break + dag[idx] = dag[target_idx] + return dag + + def make_secondary_merges( + self, + mergeable_children: set[int], + child_isomorphisms: list[set[int]], + filter_dag: list[int | None], + ) -> dict[int, set[int]]: + """ + TODO + """ + secondary_merges: dict[int, set[int]] = {} + # Form secondary edges between island nodes that are not subsets of # one another but where both of them are mergeable, and neither one is # the sink of an edge yet. - existing_sinks: set[int | None] = set(dag) + existing_sinks: set[int | None] = set(filter_dag) + already_merged: set[int] = set() for idx in mergeable_children: for other_idx in sorted(child_isomorphisms[idx]): if ( other_idx in mergeable_children - and dag[idx] is None - and dag[other_idx] is None + and filter_dag[idx] is None + and filter_dag[other_idx] is None and idx not in existing_sinks and other_idx not in existing_sinks + and other_idx not in secondary_merges + and other_idx not in already_merged ): - dag[idx] = other_idx - break + secondary_merges[idx] = secondary_merges.get(idx, set()) + secondary_merges[idx].add(other_idx) + already_merged.add(other_idx) - # Collapse transitive edges - for idx in range(len(dag)): - if dag[idx] is not None: - while True: - target_idx: int | None = dag[idx] - if target_idx is None or dag[target_idx] is None: - break - dag[idx] = dag[target_idx] - return dag + return secondary_merges diff --git a/tests/test_pipeline_tpch_custom.py b/tests/test_pipeline_tpch_custom.py index 6ea89e574..7fad9cf0b 100644 --- a/tests/test_pipeline_tpch_custom.py +++ b/tests/test_pipeline_tpch_custom.py @@ -3263,6 +3263,59 @@ ), id="count_multiple_filters_u", ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE(market_segment == 'BUILDING')\n" + "c2 = c1.WHERE(MONOTONIC(500, account_balance, 1000))\n" + "c3 = c1.WHERE(~MONOTONIC(500, account_balance, 1000))\n" + "result = TPCH.CALCULATE(" + " n2=COUNT(c2), " + " n3=COUNT(c3), " + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n2": [1394], + "n3": [28748], + } + ), + "count_multiple_filters_v", + skip_sql=True, + ), + id="count_multiple_filters_v", + ), + pytest.param( + PyDoughPandasTest( + "c1 = customers.WHERE((market_segment == 'BUILDING') & STARTSWITH(phone, '30'))\n" + "c2 = customers.WHERE((market_segment == 'BUILDING') & STARTSWITH(phone, '31'))\n" + "c3 = customers.WHERE((market_segment == 'BUILDING') & STARTSWITH(phone, '32'))\n" + "c4 = customers.WHERE((market_segment == 'HOUSEHOLD') & STARTSWITH(phone, '30'))\n" + "c5 = customers.WHERE((market_segment == 'HOUSEHOLD') & STARTSWITH(phone, '31'))\n" + "c6 = customers.WHERE((market_segment == 'HOUSEHOLD') & STARTSWITH(phone, '32'))\n" + "result = TPCH.CALCULATE(" + " n1=COUNT(c1)," + " n2=COUNT(c2)," + " n3=COUNT(c3)," + " n4=COUNT(c4)," + " n5=COUNT(c5)," + " n6=COUNT(c6)," + ")", + "TPCH", + lambda: pd.DataFrame( + { + "n1": [1182], + "n2": [1230], + "n3": [1207], + "n4": [1206], + "n5": [1215], + "n6": [1265], + } + ), + "count_multiple_filters_w", + skip_sql=True, + ), + id="count_multiple_filters_w", + ), pytest.param( PyDoughPandasTest( order_quarter_test, diff --git a/tests/test_plan_refsols/common_prefix_d.txt b/tests/test_plan_refsols/common_prefix_d.txt index 51618c64b..57dc6413b 100644 --- a/tests/test_plan_refsols/common_prefix_d.txt +++ b/tests/test_plan_refsols/common_prefix_d.txt @@ -1,23 +1,15 @@ -ROOT(columns=[('name', r_name), ('n_nations', n_rows), ('n_customers', sum_expr), ('n_suppliers', sum_n_rows), ('n_orders_94', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_expr_10, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_n_rows, 0:numeric))], orderings=[(r_name):asc_first]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_expr': t1.sum_n_rows, 'sum_n_rows': t1.sum_sum_n_rows_0, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_10': t1.sum_sum_expr_10, 'sum_sum_n_rows': t1.sum_sum_n_rows}) +ROOT(columns=[('name', r_name), ('n_nations', n_rows), ('n_customers', sum_sum_expr), ('n_suppliers', sum_n_rows), ('n_orders_94', DEFAULT_TO(sum_sum_sum_expr, 0:numeric)), ('n_orders_95', DEFAULT_TO(sum_sum_sum_expr_33, 0:numeric)), ('n_orders_96', DEFAULT_TO(sum_sum_sum_expr_34, 0:numeric))], orderings=[(r_name):asc_first]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_n_rows': t1.sum_n_rows, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_sum_expr': t1.sum_sum_sum_expr, 'sum_sum_sum_expr_33': t1.sum_sum_sum_expr_33, 'sum_sum_sum_expr_34': t1.sum_sum_sum_expr_34}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_10': SUM(sum_expr_10), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_n_rows_0': SUM(sum_n_rows_0)}) - JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_10': t0.sum_expr_10, 'sum_n_rows': t0.sum_n_rows, 'sum_n_rows_0': t1.n_rows}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr': t1.sum_expr_7, 'sum_expr_10': t1.sum_expr_10, 'sum_n_rows': t1.sum_n_rows}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows), 'sum_sum_expr': SUM(sum_expr_16), 'sum_sum_sum_expr': SUM(sum_sum_expr), 'sum_sum_sum_expr_33': SUM(sum_sum_expr_33), 'sum_sum_sum_expr_34': SUM(sum_sum_expr_34)}) + JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_expr_16': t0.n_rows, 'sum_sum_expr': t0.sum_sum_expr, 'sum_sum_expr_33': t0.sum_sum_expr_33, 'sum_sum_expr_34': t0.sum_sum_expr_34}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 'sum_sum_expr': t1.sum_sum_expr_35, 'sum_sum_expr_33': t1.sum_sum_expr_33, 'sum_sum_expr_34': t1.sum_sum_expr_34}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_expr_10': SUM(expr_10), 'sum_expr_7': SUM(expr_7), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'expr_10': t0.n_rows, 'expr_7': t0.expr_7, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'expr_7': t0.n_rows, 'n_rows': t1.n_rows}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_nationkey': t0.c_nationkey, 'n_rows': t1.n_rows}) - SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1994:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1995:numeric, columns={'o_custkey': o_custkey}) - SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey}) + AGGREGATE(keys={'c_nationkey': c_nationkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_33': SUM(sum_expr), 'sum_sum_expr_34': SUM(sum_expr_34), 'sum_sum_expr_35': SUM(sum_expr_35)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_nationkey': t0.c_nationkey, 'sum_expr': t1.sum_expr, 'sum_expr_34': t1.sum_expr_34, 'sum_expr_35': t1.sum_expr_35}) + SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(YEAR(o_orderdate) == 1995:numeric), 'sum_expr_34': SUM(YEAR(o_orderdate) == 1996:numeric), 'sum_expr_35': SUM(YEAR(o_orderdate) == 1994:numeric)}) + FILTER(condition=YEAR(o_orderdate) == 1994:numeric | YEAR(o_orderdate) == 1995:numeric | YEAR(o_orderdate) == 1996:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) AGGREGATE(keys={'s_nationkey': s_nationkey}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_g.txt b/tests/test_plan_refsols/count_multiple_filters_g.txt index 4ab4fbe1a..b3d5f2b80 100644 --- a/tests/test_plan_refsols/count_multiple_filters_g.txt +++ b/tests/test_plan_refsols/count_multiple_filters_g.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', agg_3), ('n5', agg_4), ('n6', sum_expr_9)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t0.agg_4, 'n_rows': t0.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_9': t1.sum_expr_9}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) +ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr_9), ('n4', agg_3), ('n5', agg_4), ('n6', sum_expr)], orderings=[]) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_9': t0.sum_expr_9}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_9': t0.sum_expr_9}) + JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_9': t1.sum_expr_9}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) @@ -12,19 +12,19 @@ ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', agg_3), (' SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) - SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(n_name == 'CHINA':string), 'sum_expr_9': SUM(n_name == 'GERMANY':string)}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) + FILTER(condition=n_name == 'GERMANY':string | n_name == 'CHINA':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) + FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) - SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(n_name == 'GERMANY':string), 'sum_expr_9': SUM(n_name == 'CHINA':string)}) - JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name}) - SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'CHINA':string | n_name == 'GERMANY':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) + SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) + FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_j.txt b/tests/test_plan_refsols/count_multiple_filters_j.txt index 91b2ee95a..87fe0dc34 100644 --- a/tests/test_plan_refsols/count_multiple_filters_j.txt +++ b/tests/test_plan_refsols/count_multiple_filters_j.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_16, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr_18, 'sum_sum_expr_16': t1.sum_sum_expr_16, 'sum_sum_expr_17': t1.sum_sum_expr_17}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_k.txt b/tests/test_plan_refsols/count_multiple_filters_k.txt index 21042aa6f..08c72a165 100644 --- a/tests/test_plan_refsols/count_multiple_filters_k.txt +++ b/tests/test_plan_refsols/count_multiple_filters_k.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + FILTER(condition=sum_sum_expr_18 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_l.txt b/tests/test_plan_refsols/count_multiple_filters_l.txt index 65a513867..b0a77c408 100644 --- a/tests/test_plan_refsols/count_multiple_filters_l.txt +++ b/tests/test_plan_refsols/count_multiple_filters_l.txt @@ -1,11 +1,11 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_sum_expr_16, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr_18, 'sum_sum_expr_16': t1.sum_sum_expr_16, 'sum_sum_expr_17': t1.sum_sum_expr_17}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_m.txt b/tests/test_plan_refsols/count_multiple_filters_m.txt index 755afcac1..89bfded9a 100644 --- a/tests/test_plan_refsols/count_multiple_filters_m.txt +++ b/tests/test_plan_refsols/count_multiple_filters_m.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + FILTER(condition=sum_sum_expr_18 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_n.txt b/tests/test_plan_refsols/count_multiple_filters_n.txt index e2a8fc9e1..6de60fbac 100644 --- a/tests/test_plan_refsols/count_multiple_filters_n.txt +++ b/tests/test_plan_refsols/count_multiple_filters_n.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', sum_sum_expr_19), ('n4', DEFAULT_TO(sum_sum_expr_20, 0:numeric))], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', sum_sum_expr), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_19 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_16 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_o.txt b/tests/test_plan_refsols/count_multiple_filters_o.txt index 00a2a6983..ff294f68f 100644 --- a/tests/test_plan_refsols/count_multiple_filters_o.txt +++ b/tests/test_plan_refsols/count_multiple_filters_o.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', DEFAULT_TO(sum_sum_expr_19, 0:numeric)), ('n4', sum_sum_expr_20)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', sum_sum_expr_17)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_20 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_17 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_p.txt b/tests/test_plan_refsols/count_multiple_filters_p.txt index f6735c444..5435e2d1c 100644 --- a/tests/test_plan_refsols/count_multiple_filters_p.txt +++ b/tests/test_plan_refsols/count_multiple_filters_p.txt @@ -1,12 +1,12 @@ -ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr), ('n3', sum_sum_expr_19), ('n4', sum_sum_expr_20)], orderings=[]) - JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_19': t1.sum_sum_expr_19, 'sum_sum_expr_20': t1.sum_sum_expr_20}) +ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', sum_sum_expr), ('n4', sum_sum_expr_17)], orderings=[]) + JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr != 0:numeric & sum_sum_expr_19 != 0:numeric & sum_sum_expr_20 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr, 'sum_sum_expr_19': sum_sum_expr_19, 'sum_sum_expr_20': sum_sum_expr_20}) - AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr': SUM(sum_expr), 'sum_sum_expr_19': SUM(sum_expr_19), 'sum_sum_expr_20': SUM(sum_expr_20)}) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_19': t1.sum_expr_19, 'sum_expr_20': t1.sum_expr_20}) + FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_16 != 0:numeric & sum_sum_expr_17 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string), 'sum_expr_19': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_20': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string), 'sum_expr_17': SUM(o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string), 'sum_expr_18': SUM(o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '3-MEDIUM':string | o_orderpriority == '4-NOT SPECIFIED':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_q.txt b/tests/test_plan_refsols/count_multiple_filters_q.txt index 9fd0e560f..44e0cd1fc 100644 --- a/tests/test_plan_refsols/count_multiple_filters_q.txt +++ b/tests/test_plan_refsols/count_multiple_filters_q.txt @@ -1,7 +1,7 @@ -ROOT(columns=[('customer_key', c_custkey), ('n1', DEFAULT_TO(sum_expr, 0:numeric)), ('n2', DEFAULT_TO(sum_expr_9, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_10, 0:numeric))], orderings=[]) - AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) +ROOT(columns=[('customer_key', c_custkey), ('n1', DEFAULT_TO(sum_expr_8, 0:numeric)), ('n2', DEFAULT_TO(sum_expr, 0:numeric)), ('n3', DEFAULT_TO(sum_expr_7, 0:numeric))], orderings=[]) + AGGREGATE(keys={'c_custkey': c_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_7': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_8': SUM(o_orderpriority == '1-URGENT':string)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'o_orderpriority': t1.o_orderpriority}) FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_r.txt b/tests/test_plan_refsols/count_multiple_filters_r.txt index e98e49fc6..8eb890f2b 100644 --- a/tests/test_plan_refsols/count_multiple_filters_r.txt +++ b/tests/test_plan_refsols/count_multiple_filters_r.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_8), ('n2', sum_expr), ('n3', sum_expr_7)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_8': t1.sum_expr_8}) FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=sum_expr != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=sum_expr_8 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr, 'sum_expr_7': sum_expr_7, 'sum_expr_8': sum_expr_8}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_7': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_8': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_s.txt b/tests/test_plan_refsols/count_multiple_filters_s.txt index 3dc165039..24948bdf2 100644 --- a/tests/test_plan_refsols/count_multiple_filters_s.txt +++ b/tests/test_plan_refsols/count_multiple_filters_s.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_8), ('n2', sum_expr), ('n3', sum_expr_7)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_8': t1.sum_expr_8}) FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=sum_expr_9 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=sum_expr != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr, 'sum_expr_7': sum_expr_7, 'sum_expr_8': sum_expr_8}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_7': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_8': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_t.txt b/tests/test_plan_refsols/count_multiple_filters_t.txt index 7730768d5..47e4d1beb 100644 --- a/tests/test_plan_refsols/count_multiple_filters_t.txt +++ b/tests/test_plan_refsols/count_multiple_filters_t.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_8), ('n2', sum_expr), ('n3', sum_expr_7)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_8': t1.sum_expr_8}) FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=sum_expr_10 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=sum_expr_7 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr, 'sum_expr_7': sum_expr_7, 'sum_expr_8': sum_expr_8}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_7': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_8': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_u.txt b/tests/test_plan_refsols/count_multiple_filters_u.txt index 2599b1b79..6713a1139 100644 --- a/tests/test_plan_refsols/count_multiple_filters_u.txt +++ b/tests/test_plan_refsols/count_multiple_filters_u.txt @@ -1,8 +1,8 @@ -ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_7), ('n2', sum_expr_9), ('n3', sum_expr)], orderings=[]) - JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_9': t1.sum_expr_9}) +ROOT(columns=[('customer_key', c_custkey), ('n1', sum_expr_8), ('n2', sum_expr), ('n3', sum_expr_7)], orderings=[]) + JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'sum_expr': t1.sum_expr, 'sum_expr_7': t1.sum_expr_7, 'sum_expr_8': t1.sum_expr_8}) FILTER(condition=ISIN(c_custkey, [2, 3, 4, 17, 23, 26, 380, 827]:array[unknown]), columns={'c_custkey': c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey}) - FILTER(condition=sum_expr != 0:numeric & sum_expr_10 != 0:numeric & sum_expr_9 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr_10, 'sum_expr_7': sum_expr, 'sum_expr_9': sum_expr_9}) - AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '1-URGENT':string), 'sum_expr_10': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_9': SUM(o_orderpriority == '2-HIGH':string)}) - FILTER(condition=o_orderpriority == '3-MEDIUM':string | o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) + FILTER(condition=sum_expr != 0:numeric & sum_expr_7 != 0:numeric & sum_expr_8 != 0:numeric, columns={'o_custkey': o_custkey, 'sum_expr': sum_expr, 'sum_expr_7': sum_expr_7, 'sum_expr_8': sum_expr_8}) + AGGREGATE(keys={'o_custkey': o_custkey}, aggregations={'sum_expr': SUM(o_orderpriority == '2-HIGH':string), 'sum_expr_7': SUM(o_orderpriority == '3-MEDIUM':string), 'sum_expr_8': SUM(o_orderpriority == '1-URGENT':string)}) + FILTER(condition=o_orderpriority == '1-URGENT':string | o_orderpriority == '2-HIGH':string | o_orderpriority == '3-MEDIUM':string, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/count_multiple_filters_v.txt b/tests/test_plan_refsols/count_multiple_filters_v.txt new file mode 100644 index 000000000..a16d155c4 --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_v.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n2', sum_expr), ('n3', sum_expr_4)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)), 'sum_expr_4': SUM(NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)))}) + FILTER(condition=c_mktsegment == 'BUILDING':string | NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) & c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 1000:numeric) | NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) & c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal}) + SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_w.txt b/tests/test_plan_refsols/count_multiple_filters_w.txt new file mode 100644 index 000000000..c2e546a0a --- /dev/null +++ b/tests/test_plan_refsols/count_multiple_filters_w.txt @@ -0,0 +1,4 @@ +ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_14), ('n3', sum_expr_15), ('n4', sum_expr_16), ('n5', sum_expr_17), ('n6', sum_expr_12)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_12': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_14': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_15': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_16': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_17': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string)}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string) | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql index 0bc5f4bfb..5e4386913 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_ansi.sql @@ -2,9 +2,7 @@ SELECT SUM(LOWER(food_type) = 'vegan') / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - ( - LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND ( LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql index 0bc5f4bfb..5e4386913 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_mysql.sql @@ -2,9 +2,7 @@ SELECT SUM(LOWER(food_type) = 'vegan') / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - ( - LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND ( LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql index 5b65bdcb0..b8b8a7938 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_postgres.sql @@ -2,9 +2,7 @@ SELECT CAST(SUM(CASE WHEN LOWER(food_type) = 'vegan' THEN 1 ELSE 0 END) AS DOUBLE PRECISION) / NULLIF(SUM(CASE WHEN LOWER(food_type) <> 'vegan' THEN 1 ELSE 0 END), 0) AS ratio FROM main.restaurant WHERE - ( - LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND ( LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql index 1de217128..11850beb4 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_snowflake.sql @@ -2,9 +2,7 @@ SELECT COUNT_IF(LOWER(food_type) = 'vegan') / NULLIF(COUNT_IF(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - ( - LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND ( LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' ) diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index f17affb11..d03d55431 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -2,9 +2,7 @@ SELECT CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - ( - LOWER(city_name) = 'san francisco' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND ( LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' ) From 509e1596d7fdf0759dd0dbf1a5fc9d79712853e6 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 9 Feb 2026 11:08:39 -0800 Subject: [PATCH 18/22] Resolving testing and deletion issues --- pydough/conversion/hybrid_filter_merger.py | 2 +- pydough/conversion/hybrid_tree.py | 2 +- tests/test_masked_sf.py | 11 ++- tests/test_masked_sqlite.py | 20 +++++ .../retail_transactions_ts_raw.txt | 24 +----- .../retail_transactions_ts_rewrite.txt | 24 +----- .../retail_transactions_ts_raw_snowflake.sql | 74 +++++++++---------- ...tail_transactions_ts_rewrite_snowflake.sql | 56 +++++--------- 8 files changed, 90 insertions(+), 123 deletions(-) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index c780a1bb4..8154daf35 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -348,7 +348,7 @@ def get_filter_stripped_form(self, tree: HybridTree) -> str: for expr in operation.new_expressions.values() ): break - return repr(stripped_tree) + return repr(stripped_tree) + f" {stripped_tree.join_keys}" def make_filter_dag( self, diff --git a/pydough/conversion/hybrid_tree.py b/pydough/conversion/hybrid_tree.py index aeea1bf38..87a043d59 100644 --- a/pydough/conversion/hybrid_tree.py +++ b/pydough/conversion/hybrid_tree.py @@ -976,7 +976,7 @@ def remove_dead_children(self, must_remove: set[int]) -> dict[int, int]: continue if ( self.children[child_idx].connection_type.is_semi - and not self.children[child_idx].subtree.always_exists() + and not self.children[child_idx].get_always_exists() ) or self.children[child_idx].connection_type.is_anti: children_to_delete.discard(child_idx) diff --git a/tests/test_masked_sf.py b/tests/test_masked_sf.py index 6ab47728f..6d288e06f 100644 --- a/tests/test_masked_sf.py +++ b/tests/test_masked_sf.py @@ -1264,19 +1264,18 @@ def test_pipeline_e2e_masked_sf( [ { "DRY_RUN", - "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6]", "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", - "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970, 'EQUAL', 2, 'MONTH', 1, '__col__', 6]", "bodo/fsi/protected_customers/dob: ['AND', 3, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'DAY', 1, '__col__', 15]", "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'MONTH', 1, '__col__', 6]", "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['OR', 2, 'AND', 2, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", }, { - "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6]", - "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", - "bodo/fsi/protected_customers/dob: ['AND', 2, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", - "bodo/fsi/protected_customers/dob: ['AND', 3, 'EQUAL', 2, 'DAY', 1, '__col__', 15, 'EQUAL', 2, 'MONTH', 1, '__col__', 6, 'EQUAL', 2, 'YEAR', 1, '__col__', 1970]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'DAY', 1, '__col__', 15]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'MONTH', 1, '__col__', 6]", + "bodo/fsi/protected_customers/dob: ['EQUAL', 2, 'YEAR', 1, '__col__', 1970]", }, ], id="fsi_customers_b", diff --git a/tests/test_masked_sqlite.py b/tests/test_masked_sqlite.py index 89e83a25e..ef0c858e7 100644 --- a/tests/test_masked_sqlite.py +++ b/tests/test_masked_sqlite.py @@ -1479,6 +1479,8 @@ def test_pipeline_e2e_cryptbank( "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'CONTAINS', 2, '__col__', 'a', 'OR', 4, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 4, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { @@ -1523,6 +1525,8 @@ def test_pipeline_e2e_cryptbank( "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'OR', 4, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u', 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 4, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u', 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { @@ -1594,6 +1598,8 @@ def test_pipeline_e2e_cryptbank( "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'OR', 3, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'u', 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 3, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'AND', 2, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'u', 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'i']", "DRY_RUN", }, { @@ -1624,16 +1630,25 @@ def test_pipeline_e2e_cryptbank( "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'a']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'OR', 2, 'AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'AND', 3, 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", "DRY_RUN", }, { + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'o', 'CONTAINS', 2, '__col__', 'u']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['AND', 3, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'CONTAINS', 2, '__col__', 'o']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'u']", @@ -1665,12 +1680,17 @@ def test_pipeline_e2e_cryptbank( "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['OR', 2, 'AND', 2, 'NOT', 1, 'AND', 2, " + "'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i', 'AND', 2, 'CONTAINS', 2, '__col__', 'o', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", "DRY_RUN", }, { "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'i', 'NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'o']", "CRBNK/CUSTOMERS/c_fname: ['NOT', 1, 'AND', 2, 'CONTAINS', 2, '__col__', 'e', 'CONTAINS', 2, '__col__', 'i']", + "CRBNK/CUSTOMERS/c_fname: ['AND', 2, 'CONTAINS', 2, '__col__', 'a', 'CONTAINS', 2, '__col__', 'e']", + "CRBNK/CUSTOMERS/c_fname: ['CONTAINS', 2, '__col__', 'i']", }, ], id="cryptbank_multi_fcount_06", diff --git a/tests/test_plan_refsols/retail_transactions_ts_raw.txt b/tests/test_plan_refsols/retail_transactions_ts_raw.txt index 46d76c896..84d1b5d73 100644 --- a/tests/test_plan_refsols/retail_transactions_ts_raw.txt +++ b/tests/test_plan_refsols/retail_transactions_ts_raw.txt @@ -1,20 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric, columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric, columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric, columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric, columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric, columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) +ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_10), ('n3', sum_expr_11), ('n4', sum_expr_12), ('n5', sum_expr_13)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_10': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_11': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_12': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_13': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric)}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric, columns={'transaction_date': transaction_date}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt index a03d36ac8..0a01ae0a8 100644 --- a/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt @@ -1,20 +1,4 @@ -ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', agg_2), ('n4', agg_3), ('n5', agg_4)], orderings=[]) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t0.agg_3, 'agg_4': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t0.agg_2, 'agg_3': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'agg_2': t1.n_rows, 'n_rows': t0.n_rows}) - JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown]), columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown]), columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown]), columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown]), columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]), columns={}) - SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) +ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_10), ('n3', sum_expr_11), ('n4', sum_expr_12), ('n5', sum_expr_13)], orderings=[]) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown])), 'sum_expr_10': SUM(ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown])), 'sum_expr_11': SUM(ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown])), 'sum_expr_12': SUM(ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown])), 'sum_expr_13': SUM(ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]))}) + FILTER(condition=ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown]) | ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown]) | ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown]) | ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown]) | ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]), columns={'transaction_date': transaction_date}) + SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql b/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql index f4a984e71..4a8507776 100644 --- a/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_ts_raw_snowflake.sql @@ -1,50 +1,50 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE +SELECT + COUNT_IF( DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n1, + COUNT_IF( DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n2, + COUNT_IF( DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 -), _s5 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n3, + COUNT_IF( DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n4, + COUNT_IF( ( DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 4 OR MINUTE(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = SECOND(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) ) AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 3 -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s3.n_rows AS n3, - _s5.n_rows AS n4, - _s7.n_rows AS n5 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s5 AS _s5 -CROSS JOIN _s7 AS _s7 + ) AS n5 +FROM bodo.retail.transactions +WHERE + ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 4 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 3 + ) + OR ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 + ) + OR ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 1 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 + ) + OR ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 7 + ) + OR ( + DAY(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 2 + AND HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = 8 + ) + OR ( + HOUR(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) < 3 + AND MINUTE(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) = SECOND(CAST(PTY_UNPROTECT_TS(transaction_date) AS TIMESTAMP)) + ) diff --git a/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql b/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql index b8cd69ee9..6eeb2a4cf 100644 --- a/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql +++ b/tests/test_sql_refsols/retail_transactions_ts_rewrite_snowflake.sql @@ -1,43 +1,23 @@ -WITH _s0 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE - transaction_date IN ('2178-03-20 07:19:29', '2825-09-23 07:37:08') -), _s1 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE +SELECT + COUNT_IF(transaction_date IN ('2178-03-20 07:19:29', '2825-09-23 07:37:08')) AS n1, + COUNT_IF( transaction_date IN ('1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03') -), _s3 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n2, + COUNT_IF( transaction_date IN ('3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51') -), _s5 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n3, + COUNT_IF( transaction_date IN ('2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08') -), _s7 AS ( - SELECT - COUNT(*) AS n_rows - FROM bodo.retail.transactions - WHERE + ) AS n4, + COUNT_IF( transaction_date IN ('1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32') OR transaction_date IN ('2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01') -) -SELECT - _s0.n_rows AS n1, - _s1.n_rows AS n2, - _s3.n_rows AS n3, - _s5.n_rows AS n4, - _s7.n_rows AS n5 -FROM _s0 AS _s0 -CROSS JOIN _s1 AS _s1 -CROSS JOIN _s3 AS _s3 -CROSS JOIN _s5 AS _s5 -CROSS JOIN _s7 AS _s7 + ) AS n5 +FROM bodo.retail.transactions +WHERE + transaction_date IN ('1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03') + OR transaction_date IN ('1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32') + OR transaction_date IN ('2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08') + OR transaction_date IN ('2178-03-20 07:19:29', '2825-09-23 07:37:08') + OR transaction_date IN ('2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01') + OR transaction_date IN ('3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51') From 974eaf6000f7ebd35d1b4c61e096d1579bc07724 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 9 Feb 2026 12:26:41 -0800 Subject: [PATCH 19/22] Documentation and cleanup --- pydough/conversion/hybrid_expressions.py | 29 ++ pydough/conversion/hybrid_filter_merger.py | 262 ++++++++++++++---- .../count_multiple_filters_w.txt | 2 +- 3 files changed, 235 insertions(+), 58 deletions(-) diff --git a/pydough/conversion/hybrid_expressions.py b/pydough/conversion/hybrid_expressions.py index d28a78286..8b8f19843 100644 --- a/pydough/conversion/hybrid_expressions.py +++ b/pydough/conversion/hybrid_expressions.py @@ -14,10 +14,12 @@ "HybridFunctionExpr", "HybridLiteralExpr", "HybridRefExpr", + "make_condition", ] import copy from abc import ABC, abstractmethod +from collections.abc import Collection import pydough.pydough_operators as pydop from pydough.qdag import ( @@ -766,3 +768,30 @@ def expand_sided(self, shift: int) -> HybridExpr: self.typ, self.kwargs, ) + + +def make_condition( + expressions: Collection[HybridExpr], conjunction: bool +) -> HybridExpr: + """ + Converts a list of expressions into a composite boolean expression, either + a conjunction or disjunction. If the list is empty, returns a literal True + expression. + + Args: + `expressions`: the expressions to combine into a condition. + `conjunction`: whether to combine the expressions using AND (if True) or + OR (if False). + + Returns: + The combined condition expression. + """ + if not expressions: + return HybridLiteralExpr(Literal(True, BooleanType())) + if len(expressions) == 1: + return next(iter(expressions)) + if conjunction: + return HybridFunctionExpr( + pydop.BAN, sorted(expressions, key=repr), BooleanType() + ) + return HybridFunctionExpr(pydop.BOR, sorted(expressions, key=repr), BooleanType()) diff --git a/pydough/conversion/hybrid_filter_merger.py b/pydough/conversion/hybrid_filter_merger.py index 8154daf35..9062ab84f 100644 --- a/pydough/conversion/hybrid_filter_merger.py +++ b/pydough/conversion/hybrid_filter_merger.py @@ -18,6 +18,7 @@ HybridExpr, HybridFunctionExpr, HybridLiteralExpr, + make_condition, ) from .hybrid_operations import ( HybridCalculate, @@ -40,40 +41,63 @@ def __init__(self, translator: "HybridTranslator") -> None: def merge_filters(self, tree: HybridTree) -> None: """ - TODO + The main protocol that runs the filter merging procedure on the given + tree with regards to its children, then recursively invokes the same + procedure on the rest of the tree. """ - # Keep a set of all children that are marked for certain deletion.\ + # Keep a set of all children that are marked for certain deletion. must_delete: set[int] = set() # Run the main procedure on subtrees with multiple children. if len(tree.children) > 1: - # Identify which children are only used by a COUNT aggregation that is - # not ONLY_MATCH. + # Identify which children are only used by a COUNT aggregation that + # is not ONLY_MATCH. mergeable_children: set[int] = self.identify_mergeable_children(tree) - # TODO ADD COMMENT + # Extract the set of filters in the bottom level of each child tree, + # only considering filters after a critical point (limits, windows). child_filters: list[set[HybridExpr]] = [ self.get_final_filters(child.subtree) for child in tree.children ] - # TODO ADD COMMENT + # Obtain a mapping from each child to the set of all other children + # in the tree that are isomorphic to it excluding filters after + # the critical point. child_isomorphisms: list[set[int]] = self.get_child_isomorphisms(tree) - # TODO ADD COMMENT + # Create a DAG of each mergeable child to another child that it is + # isomorphic to except that the other child has a strict subset of + # its filters, if such a child exists, otherwise None. Path + # compression is used to ensure there is no daisy chain. filter_dag: list[int | None] = self.make_filter_dag( mergeable_children, child_filters, child_isomorphisms ) - # TODO ADD COMMENT + # Create a secondary mapping to indicate pools of children that were + # not merged by the dag because there was no child with a filter + # subset relationship, but are still isomorphic to one another. + # These are stored in the form of a pool of isomorphic children, + # where one member of the pool is the key and the rest are the + # value. secondary_merges: dict[int, set[int]] = self.make_secondary_merges( mergeable_children, child_isomorphisms, filter_dag ) - # TODO ADD COMMENT + # Build up a dictionary indicating all COUNT(*) references in the + # tree that have been replaced with a SUM(cond) reference in a + # different child of the tree. replacement_map: dict[HybridExpr, HybridExpr] = {} + + # For each pair (source -> target) in the filter DAG, run the + # basic merging procedure. for source_idx, target_idx in enumerate(filter_dag): + # Make sure the source maps to a target, as opposed to None. if target_idx is None: continue + # Identify all the filters in the source and not the child, + # and vice versa. There should be at least 1 extra filter in + # the source, but no extra filters in the target since it is + # a subset relationship. extra_source_filters: set[HybridExpr] = ( child_filters[source_idx] - child_filters[target_idx] ) @@ -81,6 +105,8 @@ def merge_filters(self, tree: HybridTree) -> None: child_filters[target_idx] - child_filters[source_idx] ) assert len(extra_source_filters) > 0 and len(extra_target_filters) == 0 + # Run the merge subset filter procedure for this source and + # target, updating the replacement map and deletion set. self.merge_subset_filters( tree, source_idx, @@ -90,6 +116,8 @@ def merge_filters(self, tree: HybridTree) -> None: must_delete, ) + # For each (target <- source_pool), run the more advanced algorithm + # which combines multiple children with distinct sets of filters. for target_idx, source_idxs in secondary_merges.items(): self.merge_partial_disjoint_filters( tree, @@ -100,10 +128,12 @@ def merge_filters(self, tree: HybridTree) -> None: must_delete, ) - # TODO ADD COMMENT + # Replace all of the COUNT(*) terms in the current tree from a + # remapped child with the new SUM expression. for operation in tree.pipeline: operation.replace_expressions(replacement_map) + # Before moving on, we need to remove any dead children from the tree. tree.remove_dead_children(must_delete) # Run the procedure recursively on the parent tree and the child @@ -123,17 +153,28 @@ def merge_subset_filters( must_delete: set[int], ) -> None: """ - TODO + Run the merging procedure on a source and target child where the source + has a strict superset of filters compared to the target, and the same + underlying aggregation structure, meaning the source can be merged into + the target. + + Args: + `tree`: The tree whose children are being merged. + `source_idx`: The index of the source child that is being merged. + `target_idx`: The index of the target child that is being merged + into. + `extra_source_filters`: The set of filters in the source child that + are not in the target child. + `replacement_map`: A mapping that must be updated to indicate any + references to the old source child and a new reference in the target + child that the should be remapped to. + `must_delete`: A set of child indices that must be updated to + include the source child index, since it will be merged into the + target and therefore removed. """ - new_cond: HybridExpr - if len(extra_source_filters) == 1: - new_cond = next(iter(extra_source_filters)) - else: - new_cond = HybridFunctionExpr( - pydop.BAN, - sorted(extra_source_filters, key=repr), - BooleanType(), - ) + # Build a new aggregation SUM(IFF(conj, 1, 0)) where conj is the + # conjunction of all the extra filters from the source subtree. + new_cond: HybridExpr = make_condition(extra_source_filters, True) numeric_expr: HybridExpr = HybridFunctionExpr( pydop.IFF, [ @@ -148,6 +189,9 @@ def merge_subset_filters( [numeric_expr], NumericType(), ) + # Insert the new aggregation into the target subtree, and update the + # replacement map to point from the old COUNT(*) reference in the source + # subtree to the new SUM expression reference in the target subtree. agg_name: str = self.translator.gen_agg_name(tree.children[target_idx]) tree.children[target_idx].aggs[agg_name] = sum_expr agg_ref: HybridExpr = HybridChildRefExpr(agg_name, target_idx, NumericType()) @@ -161,6 +205,9 @@ def merge_subset_filters( NumericType(), ) replacement_map[old_agg_ref] = agg_ref + + # Update the min/max steps of the target subtree to indicate overlap + # with the source subtree. tree.children[target_idx].max_steps = min( tree.children[target_idx].max_steps, tree.children[source_idx].max_steps, @@ -188,6 +235,9 @@ def merge_subset_filters( ) ) + # Finally, mark the source child for deletion since it has now been + # merged into the target child, unless they are the same child (see + # the special case in `merge_partial_disjoint_filters`). if source_idx != target_idx: must_delete.add(source_idx) @@ -201,14 +251,38 @@ def merge_partial_disjoint_filters( must_delete: set[int], ) -> None: """ - TODO + Run the merging procedure on a pool of multiple source children that + are isomorphic to the target child, but where there is no subset + relationship. This is done by transforming the target child to have + a disjunction of all the filters from the source children, then + making all of the COUNT(*) calls from the different target/sources + be on all the filters from that specific child that are not in all of + the other children. + + Args: + `tree`: The tree whose children are being merged. + `target_idx`: The index of the target child that is being merged + into. + `source_idxs`: The set of indices of the source children that are + being merged into the target. + `all_filters`: A list of the sets of filters in each child subtree + after the critical point. + `replacement_map`: A mapping that must be updated to indicate any + references to the old source child and a new reference in the target + child that the should be remapped to. + `must_delete`: A set of child indices that must be updated to + include the source child indices, since they will be merged into the + target and therefore removed. """ - # TODO ADD COMMENTS + # Identify any filters that are in all the children, since these can be + # ignored when creating the new conditions for the aggregations. intersection = set.intersection( *(all_filters[source_idx] for source_idx in source_idxs), all_filters[target_idx], ) + # For each of the source children, merge it onto the target child as if + # it were a subset merge. for source_idx in sorted(source_idxs): extra_source_filters: set[HybridExpr] = ( all_filters[source_idx] - intersection @@ -222,7 +296,9 @@ def merge_partial_disjoint_filters( must_delete, ) - # TODO ADD COMMENTS + # Merge the target child onto itself using the extra filters it has, so + # that its own COUNT(*) is replaced with a SUM over the filters that it + # has that are not in all the other children. extra_target_filters: set[HybridExpr] = all_filters[target_idx] - intersection self.merge_subset_filters( tree, @@ -233,42 +309,27 @@ def merge_partial_disjoint_filters( must_delete, ) - # TODO: ADD COMMENTS - new_conds: list[HybridExpr] = [] + # Build up a list of the conjunctions for each source child, which will + # be used to create the new disjunctive condition for the target + # subtree. + source_conjunctions: list[HybridExpr] = [] for source_idx in sorted(source_idxs): - source_filters: set[HybridExpr] = all_filters[source_idx] - source_cond: HybridExpr - if len(source_filters) == 1: - source_cond = next(iter(source_filters)) - else: - source_cond = HybridFunctionExpr( - pydop.BAN, - sorted(source_filters, key=repr), - BooleanType(), - ) - new_conds.append(source_cond) - - # TODO: ADD COMMENTS - new_cond: HybridExpr - if len(new_conds) == 1: - new_cond = new_conds[0] - else: - new_cond = HybridFunctionExpr( - pydop.BOR, - new_conds, - BooleanType(), - ) + source_cond: HybridExpr = make_condition(all_filters[source_idx], True) + source_conjunctions.append(source_cond) + + # Build a disjunction of the conjunctions from the source children. + new_disjunction: HybridExpr = make_condition(source_conjunctions, False) # Now go back through the target subtree, find any existing filters # after any window/limit, and make them a disjunction of the existing - # filter and the new filter conditions. + # filters and the disjunction of source conjunctions. for operation in reversed(tree.children[target_idx].subtree.pipeline): if isinstance(operation, HybridFilter): if operation.condition.contains_window_functions(): break operation.condition = HybridFunctionExpr( pydop.BOR, - [operation.condition, new_cond], + [operation.condition, new_disjunction], BooleanType(), ) elif isinstance(operation, HybridLimit): @@ -282,7 +343,15 @@ def merge_partial_disjoint_filters( def identify_mergeable_children(self, tree: HybridTree) -> set[int]: """ - TODO + Identify the subset of child indices from a hybrid tree where the child + is an aggregation where the only aggregate is a single COUNT(*). + + Args: + `tree`: The tree whose children we are checking. + + Returns: + A set of the indices of the children that are only used by a COUNT + aggregation that is not ONLY_MATCH. """ return { idx @@ -296,7 +365,14 @@ def identify_mergeable_children(self, tree: HybridTree) -> set[int]: def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: """ - TODO + Identify the set of all filter conditions that appear in the current + tree's pipeline after any critical points (limits or window functions). + + Args: + `tree`: The tree whose pipeline we are checking. + + Returns: + The set of filters. """ result: set[HybridExpr] = set() for operation in reversed(tree.pipeline): @@ -316,8 +392,19 @@ def get_final_filters(self, tree: HybridTree) -> set[HybridExpr]: def get_child_isomorphisms(self, tree: HybridTree) -> list[set[int]]: """ - TODO + Return a datastructure mapping each child index to the set of all other + child indices that have the same canonical form after stripping away all + filters after any critical points. + + Args: + `tree`: The tree whose children we are checking. + + Returns: + A list where the i'th element is the set of all other child indices + that are isomorphic to the i'th child after stripping away all + filters after any critical points. """ + # Extract the canonical forms filter_stripped_forms: list[str] = [ self.get_filter_stripped_form(child.subtree) for child in tree.children ] @@ -332,9 +419,23 @@ def get_child_isomorphisms(self, tree: HybridTree) -> list[set[int]]: def get_filter_stripped_form(self, tree: HybridTree) -> str: """ - TODO + Create a canonical string representation of the tree structure for the + hybrid tree after stripping away all filters after any critical + points (limits or window functions). Also includes the join keys, so + as to ensure that the canonical form reflects the same join + conditions. + + Args: + `tree`: The tree whose canonical form we are computing. + + Returns: + The canonical form as a string. """ + # Make a clone of the tree stripped_tree = copy.deepcopy(tree) + + # Go backwards in the tree pipeline and remove all filters until + # reaching a window function or limit. for idx, operation in reversed(list(enumerate(stripped_tree.pipeline))): if isinstance(operation, HybridFilter): if operation.condition.contains_window_functions(): @@ -348,6 +449,9 @@ def get_filter_stripped_form(self, tree: HybridTree) -> str: for expr in operation.new_expressions.values() ): break + + # Return the string form of the transformed tree along with its + # join keys. return repr(stripped_tree) + f" {stripped_tree.join_keys}" def make_filter_dag( @@ -357,9 +461,32 @@ def make_filter_dag( child_isomorphisms: list[set[int]], ) -> list[int | None]: """ - TODO + Create a DAG mapping each child onto another child in the subtree such + that the source child is mergeable, the target child has a subset of the + filters of the source child, and the two children are isomorphic after + stripping away filters. If no such mapping exists for a child, it maps to + None. Path compression is used to ensure there are no daisy chains, so + that if A maps to B and B maps to C, then A will map directly to C. + + Args: + `mergeable_children`: The set of child indices that are mergeable. + `child_filters`: A list of the sets of filters in each child subtree + after the critical point. + `child_isomorphisms`: A list where the i'th element is the set of all + other child indices that are isomorphic to the i'th child after + stripping away all filters after any critical points. + + Returns: + A list where the i'th element is either None if there is no child + that the i'th child can be merged into, or the index of a child that + the i'th child can be merged into, meaning that the i'th child has a + strict superset of filters compared to that child, and they are + isomorphic after stripping away filters. """ + # Build up the initial DAG as all-None, then fill in as connections + # are formed. dag: list[int | None] = [None for _ in range(len(child_filters))] + # Build initial edges from each mergeable child to another isomorphic # child that is a subset of its filter list. for idx in mergeable_children: @@ -368,7 +495,7 @@ def make_filter_dag( dag[idx] = other_idx break - # Collapse transitive edges + # Collapse transitive edges with path compression. for idx in range(len(dag)): if dag[idx] is not None: while True: @@ -385,13 +512,34 @@ def make_secondary_merges( filter_dag: list[int | None], ) -> dict[int, set[int]]: """ - TODO + Form the datastructure for the secondary merges, which is a mapping + from a child index serving as a target, to a set of child indices + serving as a pool of sources to merge into it, where the target and + all sources must be mergeable, isomorphic, and not be used as a + source or sink in the DAG. + + Args: + `mergeable_children`: The set of child indices that are mergeable. + `child_isomorphisms`: A list where the i'th element is the set of + all other child indices that are isomorphic to the i'th child after + stripping away all filters after any critical points. + `filter_dag`: A list where the i'th element is either None if there + is no child that the i'th child can be merged into, or the index of + a child that the i'th child can be merged into, meaning that the + i'th child has a strict superset of filters compared to that child, + and they are isomorphic after stripping away filters. + + Returns: + A mapping from a child index serving as a target, to a set of child + indices serving as a pool of sources to merge into it, where the + target and all sources must be mergeable, isomorphic, and not be + used as a source or sink in the DAG. """ secondary_merges: dict[int, set[int]] = {} # Form secondary edges between island nodes that are not subsets of # one another but where both of them are mergeable, and neither one is - # the sink of an edge yet. + # the sink of an edge yet, or has been used as a source yet. existing_sinks: set[int | None] = set(filter_dag) already_merged: set[int] = set() for idx in mergeable_children: diff --git a/tests/test_plan_refsols/count_multiple_filters_w.txt b/tests/test_plan_refsols/count_multiple_filters_w.txt index c2e546a0a..3860b0b1f 100644 --- a/tests/test_plan_refsols/count_multiple_filters_w.txt +++ b/tests/test_plan_refsols/count_multiple_filters_w.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_14), ('n3', sum_expr_15), ('n4', sum_expr_16), ('n5', sum_expr_17), ('n6', sum_expr_12)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_12': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_14': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_15': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_16': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_17': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string)}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string) | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string) | STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) From d3e33b7c7326ec18598b55064b58d04430db604d Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Mon, 9 Feb 2026 23:29:45 -0800 Subject: [PATCH 20/22] WIP overhauls --- .../relational_expressions/call_expression.py | 7 ++++++- .../alternative_quarter_cum_ir_analysis.txt | 6 +++--- tests/test_plan_refsols/common_prefix_ad.txt | 2 +- tests/test_plan_refsols/common_prefix_ag.txt | 8 ++++---- tests/test_plan_refsols/common_prefix_ah.txt | 2 +- tests/test_plan_refsols/common_prefix_ai.txt | 8 ++++---- tests/test_plan_refsols/common_prefix_aj.txt | 8 ++++---- tests/test_plan_refsols/common_prefix_ak.txt | 8 ++++---- tests/test_plan_refsols/common_prefix_ap.txt | 2 +- tests/test_plan_refsols/common_prefix_aq.txt | 6 +++--- tests/test_plan_refsols/common_prefix_q.txt | 2 +- tests/test_plan_refsols/common_prefix_r.txt | 2 +- tests/test_plan_refsols/common_prefix_s.txt | 2 +- tests/test_plan_refsols/correl_16.txt | 2 +- tests/test_plan_refsols/correl_20.txt | 2 +- tests/test_plan_refsols/correl_24.txt | 2 +- tests/test_plan_refsols/correl_25.txt | 2 +- tests/test_plan_refsols/correl_26.txt | 2 +- tests/test_plan_refsols/correl_27.txt | 2 +- tests/test_plan_refsols/correl_28.txt | 2 +- tests/test_plan_refsols/correl_3.txt | 4 ++-- tests/test_plan_refsols/correl_31.txt | 2 +- tests/test_plan_refsols/correl_34.txt | 4 ++-- tests/test_plan_refsols/correl_35.txt | 2 +- tests/test_plan_refsols/correl_36.txt | 2 +- .../count_multiple_filters_e.txt | 2 +- .../count_multiple_filters_f.txt | 2 +- .../count_multiple_filters_g.txt | 10 +++++----- .../count_multiple_filters_n.txt | 2 +- .../count_multiple_filters_o.txt | 2 +- .../count_multiple_filters_p.txt | 2 +- .../count_multiple_filters_v.txt | 2 +- .../count_multiple_filters_w.txt | 4 ++-- .../country_x_year_analysis.txt | 6 +++--- tests/test_plan_refsols/cryptbank_agg_03_raw.txt | 4 ++-- .../cryptbank_agg_03_rewrite.txt | 4 ++-- tests/test_plan_refsols/cryptbank_agg_07_raw.txt | 2 +- .../cryptbank_agg_07_rewrite.txt | 2 +- .../cryptbank_analysis_01_raw.txt | 4 ++-- .../cryptbank_analysis_01_rewrite.txt | 4 ++-- .../cryptbank_analysis_02_raw.txt | 4 ++-- .../cryptbank_analysis_02_rewrite.txt | 4 ++-- .../cryptbank_analysis_03_raw.txt | 8 ++++---- .../cryptbank_analysis_03_rewrite.txt | 8 ++++---- .../cryptbank_analysis_04_raw.txt | 2 +- .../cryptbank_analysis_04_rewrite.txt | 2 +- .../cryptbank_filter_count_11_raw.txt | 4 ++-- .../cryptbank_filter_count_11_rewrite.txt | 6 +++--- .../cryptbank_filter_count_12_raw.txt | 2 +- .../cryptbank_filter_count_12_rewrite.txt | 2 +- .../cryptbank_filter_count_13_raw.txt | 2 +- .../cryptbank_filter_count_13_rewrite.txt | 2 +- .../cryptbank_filter_count_15_rewrite.txt | 2 +- .../cryptbank_filter_count_24_rewrite.txt | 2 +- .../cryptbank_filter_count_27_raw.txt | 2 +- .../cryptbank_filter_count_27_rewrite.txt | 2 +- .../cryptbank_filter_count_28_raw.txt | 4 ++-- .../cryptbank_filter_count_28_rewrite.txt | 4 ++-- .../cryptbank_filter_count_34_raw.txt | 2 +- .../cryptbank_filter_count_34_rewrite.txt | 2 +- .../cryptbank_general_join_01_raw.txt | 8 ++++---- .../cryptbank_general_join_01_rewrite.txt | 8 ++++---- .../cryptbank_general_join_02_raw.txt | 4 ++-- .../cryptbank_general_join_02_rewrite.txt | 4 ++-- .../cryptbank_window_01_raw.txt | 2 +- .../cryptbank_window_01_rewrite.txt | 2 +- .../cryptbank_window_02_raw.txt | 2 +- .../cryptbank_window_02_rewrite.txt | 2 +- tests/test_plan_refsols/deep_best_analysis.txt | 14 +++++++------- tests/test_plan_refsols/double_cross.txt | 2 +- .../epoch_culture_events_info.txt | 2 +- .../epoch_events_per_season.txt | 2 +- .../epoch_first_event_per_era.txt | 2 +- .../epoch_intra_season_searches.txt | 10 +++++----- .../epoch_most_popular_search_engine_per_tod.txt | 2 +- .../epoch_most_popular_topic_per_region.txt | 2 +- ...lapping_event_search_other_users_per_user.txt | 2 +- ...epoch_overlapping_event_searches_per_user.txt | 4 ++-- .../epoch_summer_events_per_type.txt | 2 +- tests/test_plan_refsols/first_order_in_year.txt | 2 +- .../first_order_per_customer.txt | 2 +- .../fsi_best_account_customers_per_state_raw.txt | 4 ++-- ..._best_account_customers_per_state_rewrite.txt | 4 ++-- tests/test_plan_refsols/has_cross_correlated.txt | 2 +- ...health_first_patient_by_coverage_type_raw.txt | 2 +- ...th_first_patient_by_coverage_type_rewrite.txt | 2 +- .../highest_priority_per_year.txt | 2 +- .../lineitem_regional_shipments3.txt | 2 +- tests/test_plan_refsols/many_net_filter_1.txt | 2 +- tests/test_plan_refsols/many_net_filter_10.txt | 2 +- tests/test_plan_refsols/many_net_filter_11.txt | 2 +- tests/test_plan_refsols/many_net_filter_2.txt | 2 +- tests/test_plan_refsols/many_net_filter_3.txt | 2 +- tests/test_plan_refsols/many_net_filter_4.txt | 2 +- tests/test_plan_refsols/many_net_filter_5.txt | 2 +- tests/test_plan_refsols/many_net_filter_6.txt | 2 +- tests/test_plan_refsols/many_net_filter_7.txt | 2 +- tests/test_plan_refsols/many_net_filter_8.txt | 2 +- tests/test_plan_refsols/many_net_filter_9.txt | 2 +- .../month_year_sliding_windows.txt | 2 +- .../multi_partition_access_2.txt | 2 +- .../multi_partition_access_3.txt | 2 +- .../multi_partition_access_6.txt | 2 +- tests/test_plan_refsols/n_orders_first_day.txt | 2 +- tests/test_plan_refsols/nation_best_order.txt | 2 +- .../order_info_per_priority.txt | 2 +- .../orders_versus_first_orders.txt | 2 +- tests/test_plan_refsols/pagerank_a1.txt | 2 +- tests/test_plan_refsols/pagerank_a2.txt | 4 ++-- tests/test_plan_refsols/pagerank_a6.txt | 12 ++++++------ tests/test_plan_refsols/pagerank_b3.txt | 6 +++--- tests/test_plan_refsols/pagerank_c4.txt | 8 ++++---- tests/test_plan_refsols/pagerank_d5.txt | 10 +++++----- tests/test_plan_refsols/pagerank_h8.txt | 16 ++++++++-------- tests/test_plan_refsols/part_cross_part_a.txt | 4 ++-- tests/test_plan_refsols/part_cross_part_b.txt | 6 +++--- tests/test_plan_refsols/part_cross_part_c.txt | 6 +++--- .../percentile_customers_per_region.txt | 2 +- .../quarter_cum_ir_analysis.txt | 6 +++--- tests/test_plan_refsols/rank_with_filters_c.txt | 2 +- .../region_orders_from_nations_richest.txt | 2 +- .../regional_first_order_best_line_part.txt | 4 ++-- .../regional_suppliers_percentile.txt | 2 +- .../retail_members_agg_best_raw.txt | 2 +- .../retail_members_agg_best_rewrite.txt | 2 +- .../retail_transactions_ts_raw.txt | 4 ++-- .../retail_transactions_ts_rewrite.txt | 4 ++-- .../richest_customer_key_per_region.txt | 2 +- .../richest_customer_per_region.txt | 2 +- tests/test_plan_refsols/simple_cross_10.txt | 2 +- tests/test_plan_refsols/simple_cross_3.txt | 2 +- tests/test_plan_refsols/simple_cross_4.txt | 2 +- tests/test_plan_refsols/simple_cross_5.txt | 4 ++-- tests/test_plan_refsols/simple_cross_6.txt | 2 +- tests/test_plan_refsols/simple_cross_8.txt | 2 +- tests/test_plan_refsols/simplification_4.txt | 2 +- tests/test_plan_refsols/singular3.txt | 2 +- tests/test_plan_refsols/singular4.txt | 2 +- tests/test_plan_refsols/singular5.txt | 2 +- tests/test_plan_refsols/singular6.txt | 2 +- tests/test_plan_refsols/singular7.txt | 2 +- .../sqlite_udf_cumulative_distribution.txt | 2 +- tests/test_plan_refsols/supplier_best_part.txt | 2 +- .../technograph_country_combination_analysis.txt | 4 ++-- .../technograph_hot_purchase_window.txt | 2 +- .../technograph_monthly_incident_rate.txt | 4 ++-- ...r_cumulative_incident_rate_goldcopperstar.txt | 4 ++-- ...aph_year_cumulative_incident_rate_overall.txt | 4 ++-- .../test_plan_refsols/time_threshold_reached.txt | 2 +- tests/test_plan_refsols/top_lineitems_info_2.txt | 2 +- tests/test_plan_refsols/tpch_q19.txt | 2 +- tests/test_plan_refsols/tpch_q2.txt | 2 +- tests/test_plan_refsols/tpch_q21.txt | 4 ++-- tests/test_plan_refsols/tpch_q5.txt | 2 +- .../user_range_collection_4.txt | 4 ++-- tests/test_plan_refsols/wealthiest_supplier.txt | 2 +- tests/test_sql_refsols/correl_34_sqlite.sql | 6 +++--- .../cryptbank_filter_count_34_rewrite_sqlite.sql | 15 ++++++++++++++- .../test_sql_refsols/simplification_4_mysql.sql | 4 ++-- .../simplification_4_postgres.sql | 4 ++-- .../test_sql_refsols/simplification_4_sqlite.sql | 4 ++-- 161 files changed, 286 insertions(+), 268 deletions(-) diff --git a/pydough/relational/relational_expressions/call_expression.py b/pydough/relational/relational_expressions/call_expression.py index 3ef2ab4cf..d9d5016ff 100644 --- a/pydough/relational/relational_expressions/call_expression.py +++ b/pydough/relational/relational_expressions/call_expression.py @@ -5,7 +5,7 @@ __all__ = ["CallExpression"] -from pydough.pydough_operators import PyDoughExpressionOperator +from pydough.pydough_operators import BAN, BOR, EQU, NEQ, PyDoughExpressionOperator from pydough.types import PyDoughType from .abstract_expression import RelationalExpression @@ -27,6 +27,11 @@ def __init__( self._op: PyDoughExpressionOperator = op self._inputs: list[RelationalExpression] = inputs + # If the operator is a commutative operation (AND, OR, EQUAL, NOT-EQUAL) + # first sort the inputs to normalize them. + if self.op in (BAN, BOR, EQU, NEQ): + self._inputs.sort(key=repr) + @property def op(self) -> PyDoughExpressionOperator: """ diff --git a/tests/test_plan_refsols/alternative_quarter_cum_ir_analysis.txt b/tests/test_plan_refsols/alternative_quarter_cum_ir_analysis.txt index ba3a3106c..d394600bc 100644 --- a/tests/test_plan_refsols/alternative_quarter_cum_ir_analysis.txt +++ b/tests/test_plan_refsols/alternative_quarter_cum_ir_analysis.txt @@ -7,9 +7,9 @@ ROOT(columns=[('quarter', quarter), ('n_incidents', DEFAULT_TO(ndistinct_in_devi FILTER(condition=pr_name == 'RubyCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) AGGREGATE(keys={'quarter': DATETIME(ca_dt, 'start of quarter':string)}, aggregations={'ndistinct_in_device_id': NDISTINCT(in_device_id)}) - JOIN(condition=t0.in_device_id == t1.de_id & t1.de_product_id == t0.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t0.in_device_id}) + JOIN(condition=t0.in_device_id == t1.de_id & t0.pr_id == t1.de_product_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t0.in_device_id}) JOIN(condition=t0.in_repair_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t0.in_device_id, 'pr_id': t1.pr_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id, 'in_repair_country_id': t1.in_repair_country_id}) + JOIN(condition=DATETIME(t1.in_error_report_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id, 'in_repair_country_id': t1.in_repair_country_id}) JOIN(condition=t0.ca_dt < DATETIME(t1.pr_release, '+2 years':string, 'start of quarter':string) & t0.ca_dt >= t1.pr_release, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) FILTER(condition=pr_name == 'RubyCopper-Star':string, columns={'pr_release': pr_release}) @@ -23,7 +23,7 @@ ROOT(columns=[('quarter', quarter), ('n_incidents', DEFAULT_TO(ndistinct_in_devi SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) AGGREGATE(keys={'quarter': DATETIME(ca_dt, 'start of quarter':string)}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) JOIN(condition=t0.ca_dt < DATETIME(t1.pr_release, '+2 years':string, 'start of quarter':string) & t0.ca_dt >= t1.pr_release, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) FILTER(condition=pr_name == 'RubyCopper-Star':string, columns={'pr_release': pr_release}) diff --git a/tests/test_plan_refsols/common_prefix_ad.txt b/tests/test_plan_refsols/common_prefix_ad.txt index 2312171f5..bd2f5d00d 100644 --- a/tests/test_plan_refsols/common_prefix_ad.txt +++ b/tests/test_plan_refsols/common_prefix_ad.txt @@ -6,7 +6,7 @@ ROOT(columns=[('supplier_name', anything_s_name), ('part_name', anything_p_name) FILTER(condition=n_name == 'JAPAN':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_availqty': t0.ps_availqty}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first, (p_name):asc_last], allow_ties=False), columns={'p_name': p_name, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) FILTER(condition=p_container == 'WRAP CASE':string, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_ag.txt b/tests/test_plan_refsols/common_prefix_ag.txt index 06810301d..37b48fe95 100644 --- a/tests/test_plan_refsols/common_prefix_ag.txt +++ b/tests/test_plan_refsols/common_prefix_ag.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_n_nationkey, count_n_nationkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_n_nationkey': COUNT(n_nationkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_nationkey_0': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -24,7 +24,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ah.txt b/tests/test_plan_refsols/common_prefix_ah.txt index 266aea028..52e26678b 100644 --- a/tests/test_plan_refsols/common_prefix_ah.txt +++ b/tests/test_plan_refsols/common_prefix_ah.txt @@ -14,7 +14,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_high_orders FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ai.txt b/tests/test_plan_refsols/common_prefix_ai.txt index 9ba0f521a..a669357d6 100644 --- a/tests/test_plan_refsols/common_prefix_ai.txt +++ b/tests/test_plan_refsols/common_prefix_ai.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_domestic_lines', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_revenue': sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_n_nationkey, count_n_nationkey != 0:numeric)), 'sum_sum_revenue': SUM(sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_n_nationkey': COUNT(n_nationkey), 'sum_revenue': SUM(l_extendedprice * 1:numeric - l_discount - l_quantity * ps_supplycost)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_quantity': t1.l_quantity, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_nationkey_0': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -11,7 +11,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=c_mktsegment == 'MACHINERY':string, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_aj.txt b/tests/test_plan_refsols/common_prefix_aj.txt index c2046982c..72c372eb8 100644 --- a/tests/test_plan_refsols/common_prefix_aj.txt +++ b/tests/test_plan_refsols/common_prefix_aj.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('total_machine_high_domestic_revenue', ROUND(DEFAULT_TO(sum_sum_sum_revenue, 0:numeric), 2:numeric))], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_sum_revenue': sum_sum_sum_revenue}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) - AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_n_nationkey, count_n_nationkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows), 'sum_sum_sum_revenue': SUM(sum_sum_revenue)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_n_nationkey': COUNT(n_nationkey), 'sum_n_rows': SUM(n_rows), 'sum_sum_revenue': SUM(sum_revenue)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_nationkey_0': t0.n_nationkey, 'n_rows': t1.n_rows, 'sum_revenue': t1.sum_revenue}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -24,7 +24,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) JOIN(condition=t0.l_partkey == t1.ps_partkey & t0.l_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_quantity': t0.l_quantity, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey, 'ps_supplycost': t1.ps_supplycost}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'l_partkey': t0.l_partkey, 'l_quantity': t0.l_quantity, 'l_suppkey': t0.l_suppkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ak.txt b/tests/test_plan_refsols/common_prefix_ak.txt index 024eece8f..97749cf74 100644 --- a/tests/test_plan_refsols/common_prefix_ak.txt +++ b/tests/test_plan_refsols/common_prefix_ak.txt @@ -1,8 +1,8 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_rows), ('n_machine_high_orders', sum_n_rows), ('n_machine_high_domestic_lines', sum_sum_n_rows)], orderings=[(anything_anything_n_name):asc_first]) FILTER(condition=sum_n_rows != 0:numeric & sum_sum_n_rows != 0:numeric, columns={'anything_anything_n_name': anything_anything_n_name, 'n_rows': n_rows, 'sum_n_rows': sum_n_rows, 'sum_sum_n_rows': sum_sum_n_rows}) - AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_c_custkey, count_c_custkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) - AGGREGATE(keys={'c_custkey': c_custkey_0, 'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_c_custkey': COUNT(c_custkey), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.c_custkey == t1.c_custkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'c_custkey_0': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_rows': t1.n_rows}) + AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_anything_n_name': ANYTHING(anything_n_name), 'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_n_nationkey, count_n_nationkey != 0:numeric)), 'sum_sum_n_rows': SUM(sum_n_rows)}) + AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey_0}, aggregations={'anything_n_name': ANYTHING(n_name), 'count_n_nationkey': COUNT(n_nationkey), 'sum_n_rows': SUM(n_rows)}) + JOIN(condition=t0.c_custkey == t1.c_custkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t1.n_nationkey, 'n_nationkey_0': t0.n_nationkey, 'n_rows': t1.n_rows}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) @@ -23,7 +23,7 @@ ROOT(columns=[('nation_name', anything_anything_n_name), ('n_machine_cust', n_ro SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '2-HIGH':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t0.o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/common_prefix_ap.txt b/tests/test_plan_refsols/common_prefix_ap.txt index 43b28f45b..3fc63eb8c 100644 --- a/tests/test_plan_refsols/common_prefix_ap.txt +++ b/tests/test_plan_refsols/common_prefix_ap.txt @@ -3,7 +3,7 @@ ROOT(columns=[('part_name', p_name), ('supplier_name', s_name), ('supplier_quant FILTER(condition=p_brand == 'Brand#32':string & p_size == 10:numeric & CONTAINS(p_name, 'pink':string), columns={'p_name': p_name, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_name': p_name, 'p_partkey': p_partkey, 'p_size': p_size}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 's_name': t1.s_name}) - FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ps_partkey], order=[(ps_availqty):desc_first], allow_ties=False), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name, 's_name': t0.s_name, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/common_prefix_aq.txt b/tests/test_plan_refsols/common_prefix_aq.txt index 26ca70545..8a1ab562d 100644 --- a/tests/test_plan_refsols/common_prefix_aq.txt +++ b/tests/test_plan_refsols/common_prefix_aq.txt @@ -2,12 +2,12 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('best_supplier' JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 'r_name': t0.r_name, 's_name': t1.s_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t1.s_name}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False) == 1:numeric, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(n_name):asc_last], allow_ties=False), columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t1.ps_availqty, 's_name': t0.s_name, 's_nationkey': t0.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[s_nationkey], order=[(s_acctbal):desc_first], allow_ties=False), columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_availqty': t0.ps_availqty, 'ps_suppkey': t0.ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False) == 1:numeric, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ps_suppkey], order=[(ps_availqty):desc_first], allow_ties=False), columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) SCAN(table=tpch.PART, columns={'p_name': p_name, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/common_prefix_q.txt b/tests/test_plan_refsols/common_prefix_q.txt index 1adcbc62c..20e768eb1 100644 --- a/tests/test_plan_refsols/common_prefix_q.txt +++ b/tests/test_plan_refsols/common_prefix_q.txt @@ -6,7 +6,7 @@ ROOT(columns=[('name', c_name), ('total_spent', DEFAULT_TO(sum_o_totalprice, 0:n FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False), columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/common_prefix_r.txt b/tests/test_plan_refsols/common_prefix_r.txt index 1adea801e..fe8e69521 100644 --- a/tests/test_plan_refsols/common_prefix_r.txt +++ b/tests/test_plan_refsols/common_prefix_r.txt @@ -8,7 +8,7 @@ ROOT(columns=[('name', c_name), ('part_name', max_anything_p_name), ('line_price SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) AGGREGATE(keys={'o_orderkey': o_orderkey}, aggregations={'anything_l_extendedprice': ANYTHING(l_extendedprice), 'anything_p_name': ANYTHING(p_name), 'n_rows': COUNT()}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_extendedprice': t0.l_extendedprice, 'o_orderkey': t0.o_orderkey, 'p_name': t1.p_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(l_extendedprice):desc_first, (l_partkey):asc_last], allow_ties=False), columns={'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_partkey': t1.l_partkey, 'o_custkey': t0.o_custkey, 'o_orderkey': t0.o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric & o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/common_prefix_s.txt b/tests/test_plan_refsols/common_prefix_s.txt index 736fcb4c2..416d8da04 100644 --- a/tests/test_plan_refsols/common_prefix_s.txt +++ b/tests/test_plan_refsols/common_prefix_s.txt @@ -6,7 +6,7 @@ ROOT(columns=[('name', c_name), ('most_recent_order_date', o_orderdate), ('most_ FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_rows': t1.n_rows, 'ndistinct_l_suppkey': t1.ndistinct_l_suppkey, 'o_custkey': t0.o_custkey, 'o_orderdate': t0.o_orderdate}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):desc_first, (o_orderkey):asc_last], allow_ties=False), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=YEAR(o_orderdate) == 1998:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=ndistinct_l_suppkey < n_rows, columns={'l_orderkey': l_orderkey, 'n_rows': n_rows, 'ndistinct_l_suppkey': ndistinct_l_suppkey}) diff --git a/tests/test_plan_refsols/correl_16.txt b/tests/test_plan_refsols/correl_16.txt index e9c9aec15..73080e414 100644 --- a/tests/test_plan_refsols/correl_16.txt +++ b/tests/test_plan_refsols/correl_16.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', ndistinct_s_suppkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_s_suppkey': NDISTINCT(s_suppkey)}) - FILTER(condition=s_nationkey == n_nationkey & PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000) == tile, columns={'s_suppkey': s_suppkey}) + FILTER(condition=n_nationkey == s_nationkey & tile == PERCENTILE(args=[], partition=[c_nationkey, s_suppkey], order=[(c_acctbal):asc_last, (c_custkey):asc_last], n_buckets=10000), columns={'s_suppkey': s_suppkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) JOIN(condition=t0.n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t0.n_nationkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey, 'tile': t0.tile}) diff --git a/tests/test_plan_refsols/correl_20.txt b/tests/test_plan_refsols/correl_20.txt index 2ad164655..4247bb14a 100644 --- a/tests/test_plan_refsols/correl_20.txt +++ b/tests/test_plan_refsols/correl_20.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.n_nationkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.n_name == t1.n_name & t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 's_nationkey': t1.s_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t0.n_name, 'o_orderkey': t1.o_orderkey}) diff --git a/tests/test_plan_refsols/correl_24.txt b/tests/test_plan_refsols/correl_24.txt index 18e2c272a..4b8622056 100644 --- a/tests/test_plan_refsols/correl_24.txt +++ b/tests/test_plan_refsols/correl_24.txt @@ -1,6 +1,6 @@ ROOT(columns=[('year', year_o_orderdate), ('month', month_o_orderdate), ('n_orders_in_range', n_rows)], orderings=[(year_o_orderdate):asc_first, (month_o_orderdate):asc_first]) AGGREGATE(keys={'month_o_orderdate': month_o_orderdate, 'year_o_orderdate': year_o_orderdate}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.month_o_orderdate == MONTH(t1.o_orderdate) & t0.year_o_orderdate == YEAR(t1.o_orderdate) & MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice) | MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month_o_orderdate': t0.month_o_orderdate, 'year_o_orderdate': t0.year_o_orderdate}) + JOIN(condition=MONTH(t1.o_orderdate) == t0.month_o_orderdate & YEAR(t1.o_orderdate) == t0.year_o_orderdate & MONOTONIC(t0.avg_o_totalprice, t1.o_totalprice, t0.prev_month_avg_price) | MONOTONIC(t0.prev_month_avg_price, t1.o_totalprice, t0.avg_o_totalprice), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month_o_orderdate': t0.month_o_orderdate, 'year_o_orderdate': t0.year_o_orderdate}) PROJECT(columns={'avg_o_totalprice': avg_o_totalprice, 'month_o_orderdate': month_o_orderdate, 'prev_month_avg_price': PREV(args=[avg_o_totalprice], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last]), 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'avg_o_totalprice': AVG(o_totalprice)}) FILTER(condition=YEAR(o_orderdate) < 1994:numeric, columns={'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/correl_25.txt b/tests/test_plan_refsols/correl_25.txt index ebf6e34bb..c2a947152 100644 --- a/tests/test_plan_refsols/correl_25.txt +++ b/tests/test_plan_refsols/correl_25.txt @@ -1,6 +1,6 @@ ROOT(columns=[('cust_region_name', anything_r_name), ('cust_region_key', r_regionkey), ('cust_nation_name', anything_n_name), ('cust_nation_key', n_nationkey), ('customer_name', anything_c_name), ('n_urgent_semi_domestic_rail_orders', ndistinct_l_orderkey)], orderings=[(ndistinct_l_orderkey):desc_last, (anything_c_name):asc_first], limit=5:numeric) AGGREGATE(keys={'c_custkey': c_custkey, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_c_name': ANYTHING(c_name), 'anything_n_name': ANYTHING(n_name), 'anything_r_name': ANYTHING(r_name), 'ndistinct_l_orderkey': NDISTINCT(l_orderkey)}) - JOIN(condition=t1.n_name != t0.n_name & t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_name != t1.n_name & t0.l_suppkey == t1.s_suppkey & t0.r_name == t1.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t0.l_orderkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'l_orderkey': t1.l_orderkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/correl_26.txt b/tests/test_plan_refsols/correl_26.txt index 59d4c2efd..19bea4282 100644 --- a/tests/test_plan_refsols/correl_26.txt +++ b/tests/test_plan_refsols/correl_26.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_27.txt b/tests/test_plan_refsols/correl_27.txt index 05490b4f1..0861b3c39 100644 --- a/tests/test_plan_refsols/correl_27.txt +++ b/tests/test_plan_refsols/correl_27.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_28.txt b/tests/test_plan_refsols/correl_28.txt index 862466ad1..252b90a3a 100644 --- a/tests/test_plan_refsols/correl_28.txt +++ b/tests/test_plan_refsols/correl_28.txt @@ -1,7 +1,7 @@ ROOT(columns=[('nation_name', anything_n_name), ('n_selected_purchases', n_rows)], orderings=[(anything_n_name):asc_first]) JOIN(condition=t0.anything_n_regionkey == t1.r_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'anything_n_name': t0.anything_n_name, 'n_rows': t0.n_rows}) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'anything_n_regionkey': ANYTHING(n_regionkey), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/correl_3.txt b/tests/test_plan_refsols/correl_3.txt index 475b9a565..89bf3826e 100644 --- a/tests/test_plan_refsols/correl_3.txt +++ b/tests/test_plan_refsols/correl_3.txt @@ -2,9 +2,9 @@ ROOT(columns=[('region_name', r_name), ('n_nations', DEFAULT_TO(n_rows, 0:numeri JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - FILTER(condition=r_regionkey == anything_n_regionkey, columns={'r_regionkey': r_regionkey}) + FILTER(condition=anything_n_regionkey == r_regionkey, columns={'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_n_regionkey': ANYTHING(n_regionkey)}) - JOIN(condition=SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) == LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=LOWER(SLICE(t0.r_name, None:unknown, 2:numeric, None:unknown)) == SLICE(t1.c_comment, None:unknown, 2:numeric, None:unknown) & t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/correl_31.txt b/tests/test_plan_refsols/correl_31.txt index cd7f36ae7..a1a94675d 100644 --- a/tests/test_plan_refsols/correl_31.txt +++ b/tests/test_plan_refsols/correl_31.txt @@ -1,6 +1,6 @@ ROOT(columns=[('nation_name', anything_n_name), ('mean_rev', avg_revenue), ('median_rev', median_revenue)], orderings=[(anything_n_name):asc_first]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'avg_revenue': AVG(l_extendedprice * 1:numeric - l_discount), 'median_revenue': MEDIAN(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/correl_34.txt b/tests/test_plan_refsols/correl_34.txt index 0a0a1e6ee..5c5dc083e 100644 --- a/tests/test_plan_refsols/correl_34.txt +++ b/tests/test_plan_refsols/correl_34.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) AGGREGATE(keys={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={}) - FILTER(condition=l_orderkey == o_orderkey & ps_partkey == l_partkey & ps_suppkey == l_suppkey & o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]) | RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) == 1:numeric, columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) + FILTER(condition=l_orderkey == o_orderkey & l_partkey == ps_partkey & l_suppkey == ps_suppkey & 1:numeric == RELSIZE(args=[], partition=[l_partkey, l_suppkey], order=[]) | o_totalprice > RELAVG(args=[o_totalprice], partition=[l_linenumber, l_orderkey, ps_partkey, ps_suppkey], order=[]), columns={'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) + JOIN(condition=t0.n_name == t1.n_name & t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_totalprice': t0.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t0.l_partkey, 'l_suppkey': t0.l_suppkey, 'n_name': t0.n_name, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'l_partkey': t1.l_partkey, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/correl_35.txt b/tests/test_plan_refsols/correl_35.txt index af49adab5..dc064536a 100644 --- a/tests/test_plan_refsols/correl_35.txt +++ b/tests/test_plan_refsols/correl_35.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey & t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t0.o_orderpriority == t1.o_orderpriority & t0.p_type == t1.p_type & t0.s_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) diff --git a/tests/test_plan_refsols/correl_36.txt b/tests/test_plan_refsols/correl_36.txt index 888773da4..a3085119a 100644 --- a/tests/test_plan_refsols/correl_36.txt +++ b/tests/test_plan_refsols/correl_36.txt @@ -5,7 +5,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key': t0.key, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_partkey': t1.l_partkey, 'p_type': t0.p_type}) JOIN(condition=t0.c_custkey == t1.o_custkey & t0.o_orderpriority == t1.o_orderpriority, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key': t0.o_orderkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t1.o_orderkey, 'p_type': t0.p_type}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t0.c_custkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) - JOIN(condition=t0.o_custkey == t1.c_custkey & t1.c_nationkey == t0.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) + JOIN(condition=t0.o_custkey == t1.c_custkey & t0.s_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_custkey': t1.c_custkey, 'c_nationkey': t1.c_nationkey, 'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_orderkey': t0.o_orderkey, 'o_orderpriority': t0.o_orderpriority, 'p_type': t0.p_type}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'o_custkey': t1.o_custkey, 'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_type': t0.p_type, 's_nationkey': t0.s_nationkey}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'p_type': t0.p_type, 's_nationkey': t1.s_nationkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'p_type': t1.p_type}) diff --git a/tests/test_plan_refsols/count_multiple_filters_e.txt b/tests/test_plan_refsols/count_multiple_filters_e.txt index 3ebaf6592..1052e4ee4 100644 --- a/tests/test_plan_refsols/count_multiple_filters_e.txt +++ b/tests/test_plan_refsols/count_multiple_filters_e.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr), ('n4', sum_expr_11), ('n5', sum_expr_13), ('n6', sum_expr_12)], orderings=[]) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows, 'sum_expr': t0.sum_expr, 'sum_expr_11': t0.sum_expr_11, 'sum_expr_12': t0.sum_expr_12, 'sum_expr_13': t1.sum_expr}) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_12': SUM(c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string))}) FILTER(condition=MONOTONIC(500:numeric, c_acctbal, 600:numeric), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(STARTSWITH(c_phone, '11':string))}) diff --git a/tests/test_plan_refsols/count_multiple_filters_f.txt b/tests/test_plan_refsols/count_multiple_filters_f.txt index fed82fccd..32573f77a 100644 --- a/tests/test_plan_refsols/count_multiple_filters_f.txt +++ b/tests/test_plan_refsols/count_multiple_filters_f.txt @@ -1,3 +1,3 @@ ROOT(columns=[('n1', n_rows), ('n2', sum_expr), ('n3', sum_expr_13), ('n4', sum_expr_14), ('n5', sum_expr_15), ('n6', sum_expr_11)], orderings=[]) - AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(STARTSWITH(c_phone, '11':string) & c_mktsegment == 'BUILDING':string)}) + AGGREGATE(keys={}, aggregations={'n_rows': COUNT(), 'sum_expr': SUM(c_mktsegment == 'BUILDING':string), 'sum_expr_11': SUM(c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 600:numeric) & STARTSWITH(c_phone, '11':string)), 'sum_expr_13': SUM(MONOTONIC(500:numeric, c_acctbal, 600:numeric)), 'sum_expr_14': SUM(STARTSWITH(c_phone, '11':string)), 'sum_expr_15': SUM(c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '11':string))}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/count_multiple_filters_g.txt b/tests/test_plan_refsols/count_multiple_filters_g.txt index b3d5f2b80..a8dc7ecb6 100644 --- a/tests/test_plan_refsols/count_multiple_filters_g.txt +++ b/tests/test_plan_refsols/count_multiple_filters_g.txt @@ -4,10 +4,10 @@ ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr_9), ('n4', agg_3), JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t0.agg_1, 'n_rows': t0.n_rows, 'sum_expr': t1.sum_expr, 'sum_expr_9': t1.sum_expr_9}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'agg_1': t1.n_rows, 'n_rows': t0.n_rows}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + FILTER(condition=100:numeric == PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]), columns={}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + FILTER(condition=100:numeric == PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]), columns={}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'GERMANY':string, columns={'n_nationkey': n_nationkey}) @@ -15,16 +15,16 @@ ROOT(columns=[('n1', n_rows), ('n2', agg_1), ('n3', sum_expr_9), ('n4', agg_3), AGGREGATE(keys={}, aggregations={'sum_expr': SUM(n_name == 'CHINA':string), 'sum_expr_9': SUM(n_name == 'GERMANY':string)}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_nationkey': c_nationkey}) - FILTER(condition=n_name == 'GERMANY':string | n_name == 'CHINA':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) + FILTER(condition=n_name == 'CHINA':string | n_name == 'GERMANY':string, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + FILTER(condition=100:numeric == PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]), columns={}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) FILTER(condition=n_name == 'CHINA':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=n_name == 'CHINA':string & PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]) == 100:numeric, columns={}) + FILTER(condition=n_name == 'CHINA':string & 100:numeric == PERCENTILE(args=[], partition=[], order=[(c_acctbal):asc_last]), columns={}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'c_acctbal': t0.c_acctbal, 'n_name': t1.n_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_nationkey': c_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_n.txt b/tests/test_plan_refsols/count_multiple_filters_n.txt index 6de60fbac..a8952883c 100644 --- a/tests/test_plan_refsols/count_multiple_filters_n.txt +++ b/tests/test_plan_refsols/count_multiple_filters_n.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', sum_sum_expr), ('n4', DEFAULT_TO(sum_sum_expr_17, 0:numeric))], orderings=[]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_16 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + FILTER(condition=sum_sum_expr_16 != 0:numeric & sum_sum_expr_18 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_o.txt b/tests/test_plan_refsols/count_multiple_filters_o.txt index ff294f68f..dfe65445d 100644 --- a/tests/test_plan_refsols/count_multiple_filters_o.txt +++ b/tests/test_plan_refsols/count_multiple_filters_o.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', DEFAULT_TO(sum_sum_expr, 0:numeric)), ('n4', sum_sum_expr_17)], orderings=[]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_17 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + FILTER(condition=sum_sum_expr_17 != 0:numeric & sum_sum_expr_18 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_p.txt b/tests/test_plan_refsols/count_multiple_filters_p.txt index 5435e2d1c..6f1ac43ba 100644 --- a/tests/test_plan_refsols/count_multiple_filters_p.txt +++ b/tests/test_plan_refsols/count_multiple_filters_p.txt @@ -1,7 +1,7 @@ ROOT(columns=[('region_name', r_name), ('n1', n_rows), ('n2', sum_sum_expr_18), ('n3', sum_sum_expr), ('n4', sum_sum_expr_17)], orderings=[]) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name, 'sum_sum_expr': t1.sum_sum_expr, 'sum_sum_expr_17': t1.sum_sum_expr_17, 'sum_sum_expr_18': t1.sum_sum_expr_18}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=sum_sum_expr_18 != 0:numeric & sum_sum_expr_16 != 0:numeric & sum_sum_expr_17 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) + FILTER(condition=sum_sum_expr_16 != 0:numeric & sum_sum_expr_17 != 0:numeric & sum_sum_expr_18 != 0:numeric, columns={'n_regionkey': n_regionkey, 'n_rows': n_rows, 'sum_sum_expr': sum_sum_expr_16, 'sum_sum_expr_17': sum_sum_expr_17, 'sum_sum_expr_18': sum_sum_expr_18}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'n_rows': COUNT(), 'sum_sum_expr_16': SUM(sum_expr), 'sum_sum_expr_17': SUM(sum_expr_17), 'sum_sum_expr_18': SUM(sum_expr_18)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'sum_expr': t1.sum_expr, 'sum_expr_17': t1.sum_expr_17, 'sum_expr_18': t1.sum_expr_18}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) diff --git a/tests/test_plan_refsols/count_multiple_filters_v.txt b/tests/test_plan_refsols/count_multiple_filters_v.txt index a16d155c4..0968a0a76 100644 --- a/tests/test_plan_refsols/count_multiple_filters_v.txt +++ b/tests/test_plan_refsols/count_multiple_filters_v.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n2', sum_expr), ('n3', sum_expr_4)], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr': SUM(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)), 'sum_expr_4': SUM(NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)))}) - FILTER(condition=c_mktsegment == 'BUILDING':string | NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) & c_mktsegment == 'BUILDING':string & MONOTONIC(500:numeric, c_acctbal, 1000:numeric) | NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) & c_mktsegment == 'BUILDING':string, columns={'c_acctbal': c_acctbal}) + FILTER(condition=c_mktsegment == 'BUILDING':string & NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) | c_mktsegment == 'BUILDING':string & c_mktsegment == 'BUILDING':string & NOT(MONOTONIC(500:numeric, c_acctbal, 1000:numeric)) | MONOTONIC(500:numeric, c_acctbal, 1000:numeric), columns={'c_acctbal': c_acctbal}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_mktsegment': c_mktsegment}) diff --git a/tests/test_plan_refsols/count_multiple_filters_w.txt b/tests/test_plan_refsols/count_multiple_filters_w.txt index 3860b0b1f..ee63a3768 100644 --- a/tests/test_plan_refsols/count_multiple_filters_w.txt +++ b/tests/test_plan_refsols/count_multiple_filters_w.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_14), ('n3', sum_expr_15), ('n4', sum_expr_16), ('n5', sum_expr_17), ('n6', sum_expr_12)], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_12': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_14': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_15': SUM(STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string), 'sum_expr_16': SUM(STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string), 'sum_expr_17': SUM(STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string)}) - FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string) | STARTSWITH(c_phone, '30':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '31':string) & c_mktsegment == 'HOUSEHOLD':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'BUILDING':string | STARTSWITH(c_phone, '32':string) & c_mktsegment == 'HOUSEHOLD':string, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string)), 'sum_expr_12': SUM(c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '32':string)), 'sum_expr_14': SUM(c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '31':string)), 'sum_expr_15': SUM(c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '32':string)), 'sum_expr_16': SUM(c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '30':string)), 'sum_expr_17': SUM(c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '31':string))}) + FILTER(condition=c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '30':string) | c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '31':string) | c_mktsegment == 'BUILDING':string & STARTSWITH(c_phone, '32':string) | c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '30':string) | c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '31':string) | c_mktsegment == 'HOUSEHOLD':string & STARTSWITH(c_phone, '32':string), columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) SCAN(table=tpch.CUSTOMER, columns={'c_mktsegment': c_mktsegment, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/country_x_year_analysis.txt b/tests/test_plan_refsols/country_x_year_analysis.txt index ccc44a169..4b5efe3be 100644 --- a/tests/test_plan_refsols/country_x_year_analysis.txt +++ b/tests/test_plan_refsols/country_x_year_analysis.txt @@ -3,7 +3,7 @@ ROOT(columns=[('country_name', co_name), ('start_of_year', start_of_year), ('n_p FILTER(condition=NOT(CONTAINS(co_name, 'C':string)), columns={'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_name': co_name}) AGGREGATE(keys={'co_name': co_name, 'start_of_year': DATETIME(ca_dt, 'start of year':string)}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.co_name == t1.co_name & t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name, 'n_rows': t1.n_rows}) + JOIN(condition=t0.ca_dt == t1.ca_dt & t0.co_name == t1.co_name, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name, 'n_rows': t1.n_rows}) JOIN(condition=t1.ca_dt < DATETIME(t0.pr_release, '+2 years':string) & t1.ca_dt >= t0.pr_release, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t1.ca_dt, 'co_name': t0.co_name}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'pr_release': t1.pr_release}) FILTER(condition=NOT(CONTAINS(co_name, 'C':string)), columns={'co_name': co_name}) @@ -12,9 +12,9 @@ ROOT(columns=[('country_name', co_name), ('start_of_year', start_of_year), ('n_p SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt, 'co_name': co_name}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.de_purchase_country_id == t1.co_id & t1.co_name == t0.co_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name}) + JOIN(condition=t0.co_name == t1.co_name & t0.de_purchase_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name, 'de_purchase_country_id': t0.de_purchase_country_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name, 'de_product_id': t1.de_product_id, 'de_purchase_country_id': t1.de_purchase_country_id}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'co_name': t0.co_name, 'de_product_id': t1.de_product_id, 'de_purchase_country_id': t1.de_purchase_country_id}) JOIN(condition=t1.ca_dt < DATETIME(t0.pr_release, '+2 years':string) & t1.ca_dt >= t0.pr_release, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t1.ca_dt, 'co_name': t0.co_name}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_name': t0.co_name, 'pr_release': t1.pr_release}) FILTER(condition=NOT(CONTAINS(co_name, 'C':string)), columns={'co_name': co_name}) diff --git a/tests/test_plan_refsols/cryptbank_agg_03_raw.txt b/tests/test_plan_refsols/cryptbank_agg_03_raw.txt index 64fa1e68e..46e3a6ece 100644 --- a/tests/test_plan_refsols/cryptbank_agg_03_raw.txt +++ b/tests/test_plan_refsols/cryptbank_agg_03_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('balance', UNMASK::(SQRT([a_balance]))), ('name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))))], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False), columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_agg_03_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_03_rewrite.txt index 64fa1e68e..46e3a6ece 100644 --- a/tests/test_plan_refsols/cryptbank_agg_03_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_03_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('balance', UNMASK::(SQRT([a_balance]))), ('name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([c_fname])), UNMASK::(LOWER([c_lname]))))], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False), columns={'a_balance': a_balance, 'a_type': a_type, 'c_fname': c_fname, 'c_lname': c_lname}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'a_balance': t0.a_balance, 'a_type': t0.a_type, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_type': a_type}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_agg_07_raw.txt b/tests/test_plan_refsols/cryptbank_agg_07_raw.txt index e5ad88ef3..036b6e250 100644 --- a/tests/test_plan_refsols/cryptbank_agg_07_raw.txt +++ b/tests/test_plan_refsols/cryptbank_agg_07_raw.txt @@ -1,3 +1,3 @@ ROOT(columns=[('n_yr', DEFAULT_TO(sum_expr_31, 0:numeric)), ('n_qu', DEFAULT_TO(sum_expr_28, 0:numeric)), ('n_mo', DEFAULT_TO(sum_expr_26, 0:numeric)), ('n_we', DEFAULT_TO(sum_expr_30, 0:numeric)), ('n_da', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_ho', DEFAULT_TO(sum_expr_23, 0:numeric)), ('n_mi', DEFAULT_TO(sum_expr_25, 0:numeric)), ('n_se', DEFAULT_TO(sum_expr_29, 0:numeric)), ('n_cts', DEFAULT_TO(sum_expr_38, 0:numeric)), ('n_dts', DEFAULT_TO(sum_expr_22, 0:numeric)), ('n_nst', DEFAULT_TO(sum_expr_27, 0:numeric)), ('n_ayr', DEFAULT_TO(sum_expr_37, 0:numeric)), ('n_aqu', DEFAULT_TO(sum_expr_34, 0:numeric)), ('n_amo', DEFAULT_TO(sum_expr_33, 0:numeric)), ('n_awe', DEFAULT_TO(sum_expr_36, 0:numeric)), ('n_ada', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aho', DEFAULT_TO(sum_expr_21, 0:numeric)), ('n_ami', DEFAULT_TO(sum_expr_32, 0:numeric)), ('n_ase', DEFAULT_TO(sum_expr_35, 0:numeric)), ('n_ldm', DEFAULT_TO(sum_expr_24, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10 DAYS':string) == '2019-11-21 18:00:52':string), 'sum_expr_21': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1000 hour':string) == '2019-12-23 10:00:52':string), 'sum_expr_22': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string))), 'sum_expr_23': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of hour':string) == '2023-06-02 04:00:00':string), 'sum_expr_24': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string, '-1 day':string) == '2019-10-31':string), 'sum_expr_25': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of minute':string) == '2023-06-02 04:55:00':string), 'sum_expr_26': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string) == '2023-06-01':string), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of quarter':string) == '2023-04-01':string), 'sum_expr_29': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of second':string) == '2023-06-02 04:55:31':string), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of year':string) == '2023-01-01':string), 'sum_expr_32': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10000 minute':string) == '2019-11-18 16:40:52':string), 'sum_expr_33': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-5 Mm':string) == '2019-06-11 18:00:52':string), 'sum_expr_34': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+2 q':string) == '2020-05-11 18:00:52':string), 'sum_expr_35': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-1000000 s':string) == '2019-10-31 04:14:12':string), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_37': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1 Y':string) == '2020-11-11 18:00:52':string), 'sum_expr_38': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME('now':string, 'start of day':string)), 'sum_expr_39': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string) == '2023-06-02':string)}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10 DAYS':string) == '2019-11-21 18:00:52':string), 'sum_expr_21': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1000 hour':string) == '2019-12-23 10:00:52':string), 'sum_expr_22': SUM(DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string)) == UNMASK::(DATETIME([t_ts], '+54321 seconds'))), 'sum_expr_23': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of hour':string) == '2023-06-02 04:00:00':string), 'sum_expr_24': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string, '-1 day':string) == '2019-10-31':string), 'sum_expr_25': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of minute':string) == '2023-06-02 04:55:00':string), 'sum_expr_26': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of month':string) == '2023-06-01':string), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of quarter':string) == '2023-04-01':string), 'sum_expr_29': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of second':string) == '2023-06-02 04:55:31':string), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of year':string) == '2023-01-01':string), 'sum_expr_32': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+10000 minute':string) == '2019-11-18 16:40:52':string), 'sum_expr_33': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-5 Mm':string) == '2019-06-11 18:00:52':string), 'sum_expr_34': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+2 q':string) == '2020-05-11 18:00:52':string), 'sum_expr_35': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '-1000000 s':string) == '2019-10-31 04:14:12':string), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_37': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), '+1 Y':string) == '2020-11-11 18:00:52':string), 'sum_expr_38': SUM(DATETIME('now':string, 'start of day':string) == UNMASK::(DATETIME([t_ts], '+54321 seconds'))), 'sum_expr_39': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string) == '2023-06-02':string)}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt b/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt index ed2497185..7a4166d47 100644 --- a/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_agg_07_rewrite.txt @@ -1,3 +1,3 @@ ROOT(columns=[('n_yr', DEFAULT_TO(sum_expr_31, 0:numeric)), ('n_qu', DEFAULT_TO(sum_expr_28, 0:numeric)), ('n_mo', DEFAULT_TO(sum_expr_26, 0:numeric)), ('n_we', DEFAULT_TO(sum_expr_30, 0:numeric)), ('n_da', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_ho', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_mi', DEFAULT_TO(sum_expr_39, 0:numeric)), ('n_se', DEFAULT_TO(sum_expr_29, 0:numeric)), ('n_cts', DEFAULT_TO(sum_expr_38, 0:numeric)), ('n_dts', DEFAULT_TO(sum_expr_22, 0:numeric)), ('n_nst', DEFAULT_TO(sum_expr_27, 0:numeric)), ('n_ayr', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aqu', DEFAULT_TO(sum_expr, 0:numeric)), ('n_amo', DEFAULT_TO(sum_expr, 0:numeric)), ('n_awe', DEFAULT_TO(sum_expr_36, 0:numeric)), ('n_ada', DEFAULT_TO(sum_expr, 0:numeric)), ('n_aho', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ami', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ase', DEFAULT_TO(sum_expr, 0:numeric)), ('n_ldm', DEFAULT_TO(sum_expr_24, 0:numeric))], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(t_ts == '2019-11-11 02:55:31':unknown), 'sum_expr_22': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string))), 'sum_expr_24': SUM(ISIN(t_ts, ['2019-11-02 11:58:37', '2019-11-02 12:54:09', '2019-11-11 02:55:31', '2019-11-11 15:44:22']:array[unknown])), 'sum_expr_26': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(ISIN(t_ts, ['2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_29': SUM(t_ts == '2023-06-01 13:50:10':unknown), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(ISIN(t_ts, ['2022-12-31 17:42:54', '2023-01-04 12:05:15', '2023-01-07 22:11:27', '2023-01-20 04:38:03', '2023-01-20 16:40:54', '2023-01-27 15:13:18', '2023-01-30 19:58:26', '2023-02-02 19:12:58', '2023-02-11 11:13:53', '2023-02-11 12:32:55', '2023-02-15 21:54:29', '2023-02-16 14:18:36', '2023-02-28 07:11:29', '2023-03-07 01:26:10', '2023-03-08 18:58:18', '2023-03-14 14:23:33', '2023-03-16 06:17:44', '2023-03-17 08:48:16', '2023-03-24 03:33:40', '2023-03-26 06:52:52', '2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20', '2023-06-30 15:27:03', '2023-07-07 15:17:47', '2023-07-17 03:23:15', '2023-07-18 14:41:26', '2023-08-03 20:24:35', '2023-08-11 20:25:39', '2023-08-29 03:07:18', '2023-09-01 16:50:48', '2023-09-08 09:30:23', '2023-09-13 06:42:39', '2023-09-15 09:00:02', '2023-09-30 08:57:30', '2023-10-15 02:47:04', '2023-10-19 09:40:06', '2023-10-30 00:20:45', '2023-11-08 12:52:24', '2023-11-10 17:20:29', '2023-11-16 11:30:24', '2023-11-21 15:17:10', '2023-11-28 06:34:03', '2023-12-07 14:11:33', '2023-12-15 05:57:23', '2023-12-16 00:51:23', '2023-12-23 07:54:22']:array[unknown])), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_38': SUM(UNMASK::(DATETIME([t_ts], '+54321 seconds')) == DATETIME('now':string, 'start of day':string)), 'sum_expr_39': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14']:array[unknown]))}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(t_ts == '2019-11-11 02:55:31':unknown), 'sum_expr_22': SUM(DATETIME(JOIN_STRINGS('-':string, '2025':string, '12':string, '31':string)) == UNMASK::(DATETIME([t_ts], '+54321 seconds'))), 'sum_expr_24': SUM(ISIN(t_ts, ['2019-11-02 11:58:37', '2019-11-02 12:54:09', '2019-11-11 02:55:31', '2019-11-11 15:44:22']:array[unknown])), 'sum_expr_26': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_27': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string, '+3 days':string) == '2023-05-31':string), 'sum_expr_28': SUM(ISIN(t_ts, ['2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20']:array[unknown])), 'sum_expr_29': SUM(t_ts == '2023-06-01 13:50:10':unknown), 'sum_expr_30': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of week':string) == '2023-05-28':string), 'sum_expr_31': SUM(ISIN(t_ts, ['2022-12-31 17:42:54', '2023-01-04 12:05:15', '2023-01-07 22:11:27', '2023-01-20 04:38:03', '2023-01-20 16:40:54', '2023-01-27 15:13:18', '2023-01-30 19:58:26', '2023-02-02 19:12:58', '2023-02-11 11:13:53', '2023-02-11 12:32:55', '2023-02-15 21:54:29', '2023-02-16 14:18:36', '2023-02-28 07:11:29', '2023-03-07 01:26:10', '2023-03-08 18:58:18', '2023-03-14 14:23:33', '2023-03-16 06:17:44', '2023-03-17 08:48:16', '2023-03-24 03:33:40', '2023-03-26 06:52:52', '2023-04-18 00:35:40', '2023-04-25 18:54:26', '2023-04-29 04:58:30', '2023-05-04 23:30:10', '2023-05-12 04:42:28', '2023-05-17 18:54:12', '2023-05-19 10:10:44', '2023-05-21 13:52:14', '2023-05-24 03:51:10', '2023-06-01 13:50:10', '2023-06-01 13:50:14', '2023-06-04 10:35:26', '2023-06-11 21:53:04', '2023-06-25 15:06:06', '2023-06-25 21:58:37', '2023-06-27 03:21:19', '2023-06-27 10:34:20', '2023-06-30 15:27:03', '2023-07-07 15:17:47', '2023-07-17 03:23:15', '2023-07-18 14:41:26', '2023-08-03 20:24:35', '2023-08-11 20:25:39', '2023-08-29 03:07:18', '2023-09-01 16:50:48', '2023-09-08 09:30:23', '2023-09-13 06:42:39', '2023-09-15 09:00:02', '2023-09-30 08:57:30', '2023-10-15 02:47:04', '2023-10-19 09:40:06', '2023-10-30 00:20:45', '2023-11-08 12:52:24', '2023-11-10 17:20:29', '2023-11-16 11:30:24', '2023-11-21 15:17:10', '2023-11-28 06:34:03', '2023-12-07 14:11:33', '2023-12-15 05:57:23', '2023-12-16 00:51:23', '2023-12-23 07:54:22']:array[unknown])), 'sum_expr_36': SUM(DATETIME(UNMASK::(DATETIME([t_ts], '+54321 seconds')), 'start of day':string, '+1 week':string) == '2023-06-09':string), 'sum_expr_38': SUM(DATETIME('now':string, 'start of day':string) == UNMASK::(DATETIME([t_ts], '+54321 seconds'))), 'sum_expr_39': SUM(ISIN(t_ts, ['2023-06-01 13:50:10', '2023-06-01 13:50:14']:array[unknown]))}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_01_raw.txt b/tests/test_plan_refsols/cryptbank_analysis_01_raw.txt index 8cdd45404..71a7223ea 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_01_raw.txt @@ -2,8 +2,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_01_rewrite.txt index 8cdd45404..71a7223ea 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_01_rewrite.txt @@ -2,8 +2,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_02_raw.txt b/tests/test_plan_refsols/cryptbank_analysis_02_raw.txt index 46f08ce9d..b319dbabf 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_02_raw.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_02_raw.txt @@ -2,8 +2,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_02_rewrite.txt index 46f08ce9d..b319dbabf 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_02_rewrite.txt @@ -2,8 +2,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_03_raw.txt b/tests/test_plan_refsols/cryptbank_analysis_03_raw.txt index affdaba9e..c55676390 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_03_raw.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_03_raw.txt @@ -3,8 +3,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) @@ -13,8 +13,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_03_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_03_rewrite.txt index affdaba9e..c55676390 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_03_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_03_rewrite.txt @@ -3,8 +3,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_fname': t0.c_fname, 'c_key': t0.c_key, 'c_lname': t0.c_lname, 'sum_unmask_t_amount': t1.sum_unmask_t_amount}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_sourceaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_sourceaccount': t0.t_sourceaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) @@ -13,8 +13,8 @@ ROOT(columns=[('key', UNMASK::((42 - ([c_key])))), ('name', JOIN_STRINGS(' ':str FILTER(condition=SLICE(b_addr, -5:numeric, None:unknown, None:unknown) == '94105':string, columns={'b_key': b_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) AGGREGATE(keys={'a_custkey': a_custkey}, aggregations={'sum_unmask_t_amount': SUM(UNMASK::((1025.67 - ([t_amount]))))}) - FILTER(condition=RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False) == 1:numeric, columns={'a_custkey': a_custkey, 't_amount': t_amount}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_destaccount], order=[(UNMASK::(DATETIME([t_ts], '+54321 seconds'))):asc_last], allow_ties=False), columns={'a_custkey': a_custkey, 't_amount': t_amount}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t0.t_amount, 't_destaccount': t0.t_destaccount, 't_ts': t0.t_ts}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_destaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_custkey': t0.a_custkey, 't_amount': t1.t_amount, 't_destaccount': t1.t_destaccount, 't_sourceaccount': t1.t_sourceaccount, 't_ts': t1.t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_amount': t_amount, 't_destaccount': t_destaccount, 't_sourceaccount': t_sourceaccount, 't_ts': t_ts}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt b/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt index 9f348a995..e88dbcf23 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_04_raw.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([anything_c_fname])), UNMASK::(LOWER([anything_c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)):asc_first]) AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'anything_a_key': ANYTHING(a_key), 'anything_c_fname': ANYTHING(c_fname), 'anything_c_lname': ANYTHING(c_lname), 'n_rows': COUNT()}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 't_sourceaccount': t1.t_sourceaccount}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) FILTER(condition=MONOTONIC(1980:numeric, YEAR(UNMASK::(DATE([c_birthday], '+472 days'))), 1985:numeric), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt index 2d3442760..be0f583e4 100644 --- a/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_analysis_04_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)), ('cust_name', JOIN_STRINGS(' ':string, UNMASK::(LOWER([anything_c_fname])), UNMASK::(LOWER([anything_c_lname])))), ('n_trans', n_rows)], orderings=[(UNMASK::(CASE WHEN [anything_a_key] = 0 THEN 0 ELSE (CASE WHEN [anything_a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([anything_a_key], 1 + INSTR([anything_a_key], '-'), LENGTH([anything_a_key]) / 2) AS INTEGER) END)):asc_first]) AGGREGATE(keys={'t_sourceaccount': t_sourceaccount}, aggregations={'anything_a_key': ANYTHING(a_key), 'anything_c_fname': ANYTHING(c_fname), 'anything_c_lname': ANYTHING(c_lname), 'n_rows': COUNT()}) JOIN(condition=UNMASK::(CASE WHEN [t0.a_key] = 0 THEN 0 ELSE (CASE WHEN [t0.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t0.a_key], 1 + INSTR([t0.a_key], '-'), LENGTH([t0.a_key]) / 2) AS INTEGER) END) == t1.t_sourceaccount, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'a_key': t0.a_key, 'c_fname': t0.c_fname, 'c_lname': t0.c_lname, 't_sourceaccount': t1.t_sourceaccount}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key, 'c_fname': t1.c_fname, 'c_lname': t1.c_lname}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) FILTER(condition=ISIN(c_birthday, ['1980-01-18', '1981-07-21', '1981-11-15', '1982-11-07', '1983-12-27']:array[unknown]), columns={'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday, 'c_fname': c_fname, 'c_key': c_key, 'c_lname': c_lname}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_11_raw.txt index 6fc64fe4b..b18a51857 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_11_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_11_raw.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) FILTER(condition=UNMASK::(LOWER([c_fname])) == 'alice':string, columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt index 02a4823f6..6d2edcb2d 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_11_rewrite.txt @@ -1,8 +1,8 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'a_key': t0.a_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_key': a_key}) - FILTER(condition=c_fname == MASK::(UPPER(['alice':string])), columns={'c_key': c_key}) + FILTER(condition=MASK::(UPPER(['alice':string])) == c_fname, columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_fname': c_fname, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_12_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_12_raw.txt index 4ae8380b9..c7e800543 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_12_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_12_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=YEAR(UNMASK::(DATETIME([t0.t_ts], '+54321 seconds'))) == YEAR(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds'))) & t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount & YEAR(UNMASK::(DATETIME([t0.t_ts], '+54321 seconds'))) == YEAR(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds'))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_12_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_12_rewrite.txt index 4ae8380b9..c7e800543 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_12_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_12_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=YEAR(UNMASK::(DATETIME([t0.t_ts], '+54321 seconds'))) == YEAR(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds'))) & t0.t_sourceaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_sourceaccount & YEAR(UNMASK::(DATETIME([t0.t_ts], '+54321 seconds'))) == YEAR(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds'))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_sourceaccount': t_sourceaccount, 't_ts': t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_13_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_13_raw.txt index 88a588c45..97da54027 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_13_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_13_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::(DATETIME([t0.t_ts], '+54321 seconds')) < DATETIME(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds')), '+2 years':string) & t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(DATETIME([t0.t_ts], '+54321 seconds')) < DATETIME(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds')), '+2 years':string) & UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_destaccount': t_destaccount, 't_ts': t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_13_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_13_rewrite.txt index 88a588c45..97da54027 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_13_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_13_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::(DATETIME([t0.t_ts], '+54321 seconds')) < DATETIME(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds')), '+2 years':string) & t0.t_destaccount == UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::(DATETIME([t0.t_ts], '+54321 seconds')) < DATETIME(UNMASK::(DATETIME([t1.a_open_ts], '+123456789 seconds')), '+2 years':string) & UNMASK::(CASE WHEN [t1.a_key] = 0 THEN 0 ELSE (CASE WHEN [t1.a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([t1.a_key], 1 + INSTR([t1.a_key], '-'), LENGTH([t1.a_key]) / 2) AS INTEGER) END) == t0.t_destaccount, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) SCAN(table=CRBNK.TRANSACTIONS, columns={'t_destaccount': t_destaccount, 't_ts': t_ts}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_key': a_key, 'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt index de414aa9c..a86152882 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_15_rewrite.txt @@ -2,5 +2,5 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey, type=SEMI, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_key': c_key}) - FILTER(condition=a_type == MASK::(SUBSTRING(['retirement':string], 2) || SUBSTRING(['retirement':string], 1, 1)), columns={'a_custkey': a_custkey}) + FILTER(condition=MASK::(SUBSTRING(['retirement':string], 2) || SUBSTRING(['retirement':string], 1, 1)) == a_type, columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_custkey': a_custkey, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt index d5cb152e8..7ca31038f 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_24_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=c_birthday == MASK::(DATE(['1991-11-15':string], '-472 days')), columns={}) + FILTER(condition=MASK::(DATE(['1991-11-15':string], '-472 days')) == c_birthday, columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_birthday': c_birthday}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_27_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_27_raw.txt index eec9ddd9e..07c7c5103 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_27_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_27_raw.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & UNMASK::(LOWER([c_lname])) != 'lopez':string & ENDSWITH(UNMASK::(LOWER([c_fname])), 'a':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 's':string) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ENDSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '5':string), columns={}) + FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ENDSWITH(UNMASK::(REPLACE(REPLACE(REPLACE([c_phone], '9', '*'), '0', '9'), '*', '0')), '5':string) | UNMASK::(LOWER([c_lname])) != 'lopez':string & ENDSWITH(UNMASK::(LOWER([c_fname])), 'a':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 'e':string) | ENDSWITH(UNMASK::(LOWER([c_fname])), 's':string) & PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_birthday': c_birthday, 'c_fname': c_fname, 'c_lname': c_lname, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt index 6de5d9627..44a55a436 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_27_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))) & c_lname != 'LOPEZ':unknown & ISIN(c_fname, ['ALICE', 'GRACE', 'LUKE', 'MARIA', 'OLIVIA', 'QUEENIE', 'SOPHIA']:array[unknown]) | ISIN(c_fname, ['JAMES', 'NICHOLAS', 'THOMAS']:array[unknown]) | ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ISIN(c_phone, ['555-091-2345', '555-901-2345']:array[unknown]), columns={}) + FILTER(condition=ABSENT(UNMASK::(DATE([c_birthday], '+472 days'))) & ISIN(c_phone, ['555-091-2345', '555-901-2345']:array[unknown]) | c_lname != 'LOPEZ':unknown & ISIN(c_fname, ['ALICE', 'GRACE', 'LUKE', 'MARIA', 'OLIVIA', 'QUEENIE', 'SOPHIA']:array[unknown]) | ISIN(c_fname, ['JAMES', 'NICHOLAS', 'THOMAS']:array[unknown]) & PRESENT(UNMASK::(SUBSTRING([c_addr], -1) || SUBSTRING([c_addr], 1, LENGTH([c_addr]) - 1))) & PRESENT(UNMASK::(DATE([c_birthday], '+472 days'))), columns={}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_birthday': c_birthday, 'c_fname': c_fname, 'c_lname': c_lname, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_28_raw.txt index 3a58fa548..b1bd8c6f1 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_28_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_28_raw.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string | UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'savings':string, columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) - FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string), columns={'c_key': c_key}) + FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string), columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt index 319cf85ff..0423adc2f 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_28_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={}) FILTER(condition=YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) < 2020:numeric & UNMASK::(SQRT([a_balance])) >= 5000:numeric & ISIN(a_type, ['avingss', 'etirementr']:array[unknown]), columns={'a_custkey': a_custkey}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_custkey': a_custkey, 'a_open_ts': a_open_ts, 'a_type': a_type}) - FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string), columns={'c_key': c_key}) + FILTER(condition=CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'gmail':string) | CONTAINS(UNMASK::(SUBSTRING([c_email], -1) || SUBSTRING([c_email], 1, LENGTH([c_email]) - 1)), 'outlook':string), columns={'c_key': c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_email': c_email, 'c_key': c_key}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt b/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt index 3ee213c72..11a00e1b5 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_34_raw.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=QUARTER(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == DAY(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), columns={}) + FILTER(condition=DAY(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == QUARTER(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt b/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt index 7aeed4c0f..11a00e1b5 100644 --- a/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_filter_count_34_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=a_open_ts == '2015-05-04 18:01:51':unknown, columns={}) + FILTER(condition=DAY(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == QUARTER(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))), columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_open_ts': a_open_ts}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt index 59649defb..2bc2f7194 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_raw.txt @@ -1,12 +1,12 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key & t0.b_key == t1.b_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt index 59649defb..2bc2f7194 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_01_rewrite.txt @@ -1,12 +1,12 @@ ROOT(columns=[('branch_key', b_key), ('n_local_cust', n_rows), ('n_local_cust_local_acct', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[]) AGGREGATE(keys={'b_key': b_key}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.b_key == t1.b_key & UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.unmask_c_key & t0.b_key == t1.b_key, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'n_rows': t1.n_rows}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) AGGREGATE(keys={'b_key': b_key, 'unmask_c_key': UNMASK::((42 - ([c_key])))}, aggregations={'n_rows': COUNT()}) - JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t1.a_branchkey == t0.b_key, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) - JOIN(condition=SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) + JOIN(condition=UNMASK::((42 - ([t0.c_key]))) == t1.a_custkey & t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'b_key': t0.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t1.c_addr], -1) || SUBSTRING([t1.c_addr], 1, LENGTH([t1.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t0.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'b_key': t0.b_key, 'c_key': t1.c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt index 2300e583e..753b652d1 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_raw.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt index 2300e583e..753b652d1 100644 --- a/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_general_join_02_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.a_custkey == UNMASK::((42 - ([t1.c_key]))) & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) + JOIN(condition=UNMASK::((42 - ([t1.c_key]))) == t0.a_custkey & t0.a_branchkey == t1.b_key, type=SEMI, columns={}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_branchkey': a_branchkey, 'a_custkey': a_custkey}) - JOIN(condition=SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown) == SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) + JOIN(condition=SLICE(UNMASK::(SUBSTRING([t0.c_addr], -1) || SUBSTRING([t0.c_addr], 1, LENGTH([t0.c_addr]) - 1)), -8:numeric, -6:numeric, None:unknown) == SLICE(t1.b_addr, -8:numeric, -6:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'b_key': t1.b_key, 'c_key': t0.c_key}) SCAN(table=CRBNK.CUSTOMERS, columns={'c_addr': c_addr, 'c_key': c_key}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key}) diff --git a/tests/test_plan_refsols/cryptbank_window_01_raw.txt b/tests/test_plan_refsols/cryptbank_window_01_raw.txt index 32267be3d..fbb660584 100644 --- a/tests/test_plan_refsols/cryptbank_window_01_raw.txt +++ b/tests/test_plan_refsols/cryptbank_window_01_raw.txt @@ -1,3 +1,3 @@ ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('balance', UNMASK::(SQRT([a_balance])))], orderings=[(UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))):asc_first]) - FILTER(condition=RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string | UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'savings':string], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'retirement':string | UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1)) == 'savings':string], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False), columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt b/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt index 48b7f6c42..e0683cf73 100644 --- a/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_window_01_rewrite.txt @@ -1,3 +1,3 @@ ROOT(columns=[('account_type', UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('balance', UNMASK::(SQRT([a_balance])))], orderings=[(UNMASK::(SUBSTRING([a_type], -1) || SUBSTRING([a_type], 1, LENGTH([a_type]) - 1))):asc_first]) - FILTER(condition=RANKING(args=[], partition=[ISIN(a_type, ['avingss', 'etirementr']:array[unknown])], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False) == 1:numeric, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ISIN(a_type, ['avingss', 'etirementr']:array[unknown])], order=[(UNMASK::(SQRT([a_balance]))):desc_first], allow_ties=False), columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) SCAN(table=CRBNK.ACCOUNTS, columns={'a_balance': a_balance, 'a_key': a_key, 'a_type': a_type}) diff --git a/tests/test_plan_refsols/cryptbank_window_02_raw.txt b/tests/test_plan_refsols/cryptbank_window_02_raw.txt index dfeb1cc2a..aed9fb69b 100644 --- a/tests/test_plan_refsols/cryptbank_window_02_raw.txt +++ b/tests/test_plan_refsols/cryptbank_window_02_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('branch_name', b_name), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('creation_timestamp', UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))], orderings=[(b_name):asc_first]) - FILTER(condition=RANKING(args=[], partition=[a_branchkey], order=[(YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == 2021:numeric):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False) == 1:numeric, columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[a_branchkey], order=[(YEAR(UNMASK::(DATETIME([a_open_ts], '+123456789 seconds'))) == 2021:numeric):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False), columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'a_branchkey': t1.a_branchkey, 'a_key': t1.a_key, 'a_open_ts': t1.a_open_ts, 'b_name': t0.b_name}) FILTER(condition=CONTAINS(b_addr, ';CA;':string), columns={'b_key': b_key, 'b_name': b_name}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key, 'b_name': b_name}) diff --git a/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt b/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt index eeb528693..5257879f0 100644 --- a/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt +++ b/tests/test_plan_refsols/cryptbank_window_02_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('branch_name', b_name), ('key', UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)), ('creation_timestamp', UNMASK::(DATETIME([a_open_ts], '+123456789 seconds')))], orderings=[(b_name):asc_first]) - FILTER(condition=RANKING(args=[], partition=[a_branchkey], order=[(ISIN(a_open_ts, ['2017-02-11 10:59:51', '2017-06-15 12:41:51', '2017-07-07 14:26:51', '2017-07-09 12:21:51', '2017-09-15 11:26:51', '2018-01-02 12:26:51']:array[unknown])):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False) == 1:numeric, columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[a_branchkey], order=[(ISIN(a_open_ts, ['2017-02-11 10:59:51', '2017-06-15 12:41:51', '2017-07-07 14:26:51', '2017-07-09 12:21:51', '2017-09-15 11:26:51', '2018-01-02 12:26:51']:array[unknown])):asc_last, (UNMASK::(CASE WHEN [a_key] = 0 THEN 0 ELSE (CASE WHEN [a_key] > 0 THEN 1 ELSE -1 END) * CAST(SUBSTRING([a_key], 1 + INSTR([a_key], '-'), LENGTH([a_key]) / 2) AS INTEGER) END)):asc_last], allow_ties=False), columns={'a_key': a_key, 'a_open_ts': a_open_ts, 'b_name': b_name}) JOIN(condition=t0.b_key == t1.a_branchkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'a_branchkey': t1.a_branchkey, 'a_key': t1.a_key, 'a_open_ts': t1.a_open_ts, 'b_name': t0.b_name}) FILTER(condition=CONTAINS(b_addr, ';CA;':string), columns={'b_key': b_key, 'b_name': b_name}) SCAN(table=CRBNK.BRANCHES, columns={'b_addr': b_addr, 'b_key': b_key, 'b_name': b_name}) diff --git a/tests/test_plan_refsols/deep_best_analysis.txt b/tests/test_plan_refsols/deep_best_analysis.txt index a9a88b56e..b3916adfc 100644 --- a/tests/test_plan_refsols/deep_best_analysis.txt +++ b/tests/test_plan_refsols/deep_best_analysis.txt @@ -1,23 +1,23 @@ ROOT(columns=[('r_name', r_name), ('n_name', n_name), ('c_key', key_5), ('c_bal', c_acctbal), ('cr_bal', account_balance_13), ('s_key', s_suppkey), ('p_key', ps_partkey), ('p_qty', ps_availqty), ('cg_key', c_custkey)], orderings=[(n_name):asc_first], limit=10:numeric) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t1.c_custkey, 'key_5': t0.c_custkey, 'n_name': t0.n_name, 'ps_availqty': t0.ps_availqty, 'ps_partkey': t0.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t0.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) - JOIN(condition=t0.r_regionkey == t1.n_regionkey & t0.n_nationkey == t1.n_nationkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t0.account_balance_13, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 'r_name': t0.r_name, 's_suppkey': t1.s_suppkey}) + JOIN(condition=t0.n_nationkey == t1.n_nationkey & t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'account_balance_13': t1.c_acctbal, 'c_acctbal': t0.c_acctbal, 'c_custkey': t0.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False), columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False), columns={'c_acctbal': c_acctbal, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) - FILTER(condition=n_nationkey == s_nationkey & RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_suppkey': s_suppkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) + FILTER(condition=n_nationkey == s_nationkey & 1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (s_suppkey):asc_last], allow_ties=False), columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_suppkey': s_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(ps_availqty):desc_first, (ps_partkey):asc_last], allow_ties=False), columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey, 'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 'ps_availqty': t1.ps_availqty, 'ps_partkey': t1.ps_partkey, 's_nationkey': t0.s_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'n_regionkey': t0.n_regionkey, 's_nationkey': t1.s_nationkey, 's_suppkey': t1.s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.PARTSUPP, columns={'ps_availqty': ps_availqty, 'ps_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[], order=[(c_acctbal):desc_first, (c_custkey):asc_last], allow_ties=False), columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/double_cross.txt b/tests/test_plan_refsols/double_cross.txt index b714f74e3..432a95b46 100644 --- a/tests/test_plan_refsols/double_cross.txt +++ b/tests/test_plan_refsols/double_cross.txt @@ -1,6 +1,6 @@ ROOT(columns=[('wk', ord_wk), ('n_lines', n_rows), ('n_orders', anything_n_rows), ('lpo', ROUND(RELSUM(args=[n_rows], partition=[], order=[(line_wk):asc_last], cumulative=True) / RELSUM(args=[anything_n_rows], partition=[], order=[(ord_wk):asc_last], cumulative=True), 4:numeric))], orderings=[(ord_wk):asc_first]) AGGREGATE(keys={'line_wk': DATEDIFF('week':string, min_o_orderdate, l_receiptdate), 'ord_wk': ord_wk}, aggregations={'anything_n_rows': ANYTHING(n_rows), 'n_rows': COUNT()}) - JOIN(condition=DATEDIFF('week':string, t0.min_o_orderdate, t1.l_receiptdate) < 10:numeric & t0.ord_wk == DATEDIFF('week':string, t0.min_o_orderdate, t1.l_receiptdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_o_orderdate': t0.min_o_orderdate, 'n_rows': t0.n_rows, 'ord_wk': t0.ord_wk}) + JOIN(condition=DATEDIFF('week':string, t0.min_o_orderdate, t1.l_receiptdate) < 10:numeric & DATEDIFF('week':string, t0.min_o_orderdate, t1.l_receiptdate) == t0.ord_wk, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_receiptdate': t1.l_receiptdate, 'min_o_orderdate': t0.min_o_orderdate, 'n_rows': t0.n_rows, 'ord_wk': t0.ord_wk}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'min_o_orderdate': t1.min_o_orderdate, 'n_rows': t0.n_rows, 'ord_wk': t0.ord_wk}) AGGREGATE(keys={'ord_wk': DATEDIFF('week':string, min_o_orderdate, o_orderdate)}, aggregations={'n_rows': COUNT()}) JOIN(condition=DATEDIFF('week':string, t0.min_o_orderdate, t1.o_orderdate) < 10:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'min_o_orderdate': t0.min_o_orderdate, 'o_orderdate': t1.o_orderdate}) diff --git a/tests/test_plan_refsols/epoch_culture_events_info.txt b/tests/test_plan_refsols/epoch_culture_events_info.txt index e26cfdcde..ba89164d2 100644 --- a/tests/test_plan_refsols/epoch_culture_events_info.txt +++ b/tests/test_plan_refsols/epoch_culture_events_info.txt @@ -5,7 +5,7 @@ ROOT(columns=[('event_name', ev_name), ('era_name', er_name), ('event_year', YEA FILTER(condition=ev_typ == 'culture':string, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key, 'ev_name': ev_name, 'ev_typ': ev_typ}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month3 | MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 's_name': t1.s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_key': ev_key}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) JOIN(condition=t1.t_start_hour <= HOUR(t0.ev_dt) & HOUR(t0.ev_dt) < t1.t_end_hour, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_key': t0.ev_key, 't_name': t1.t_name}) diff --git a/tests/test_plan_refsols/epoch_events_per_season.txt b/tests/test_plan_refsols/epoch_events_per_season.txt index 3dbc17b63..a3c7fdc32 100644 --- a/tests/test_plan_refsols/epoch_events_per_season.txt +++ b/tests/test_plan_refsols/epoch_events_per_season.txt @@ -1,5 +1,5 @@ ROOT(columns=[('season_name', s_name), ('n_events', n_rows)], orderings=[(n_rows):desc_last, (s_name):asc_first]) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT()}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month3 | MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt}) diff --git a/tests/test_plan_refsols/epoch_first_event_per_era.txt b/tests/test_plan_refsols/epoch_first_event_per_era.txt index a4a1793d0..096eed215 100644 --- a/tests/test_plan_refsols/epoch_first_event_per_era.txt +++ b/tests/test_plan_refsols/epoch_first_event_per_era.txt @@ -1,5 +1,5 @@ ROOT(columns=[('era_name', er_name), ('event_name', ev_name)], orderings=[(er_start_year):asc_first]) - FILTER(condition=RANKING(args=[], partition=[er_name], order=[(ev_dt):asc_last], allow_ties=False) == 1:numeric, columns={'er_name': er_name, 'er_start_year': er_start_year, 'ev_name': ev_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[er_name], order=[(ev_dt):asc_last], allow_ties=False), columns={'er_name': er_name, 'er_start_year': er_start_year, 'ev_name': ev_name}) JOIN(condition=t0.er_start_year <= YEAR(t1.ev_dt) & YEAR(t1.ev_dt) < t0.er_end_year, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'er_name': t0.er_name, 'er_start_year': t0.er_start_year, 'ev_dt': t1.ev_dt, 'ev_name': t1.ev_name}) SCAN(table=ERAS, columns={'er_end_year': er_end_year, 'er_name': er_name, 'er_start_year': er_start_year}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) diff --git a/tests/test_plan_refsols/epoch_intra_season_searches.txt b/tests/test_plan_refsols/epoch_intra_season_searches.txt index 81911c559..d86ba0b83 100644 --- a/tests/test_plan_refsols/epoch_intra_season_searches.txt +++ b/tests/test_plan_refsols/epoch_intra_season_searches.txt @@ -3,20 +3,20 @@ ROOT(columns=[('season_name', s_name), ('pct_season_searches', ROUND(100.0:numer AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(DEFAULT_TO(KEEP_IF(count_search_id, count_search_id != 0:numeric), 0:numeric) > 0:numeric)}) AGGREGATE(keys={'s_name': s_name, 'search_id': search_id_0}, aggregations={'count_search_id': COUNT(search_id)}) JOIN(condition=t0.s_name == t1.s_name & t0.search_id == t1.search_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_id_0': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month3 | MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'s_name': t0.s_name, 'search_id': t1.search_id}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_ts': search_ts}) - JOIN(condition=t1.s_name == t0.s_name & MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) + JOIN(condition=t0.s_name == t1.s_name & MONTH(t0.ev_dt) == t1.s_month3 | MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_id': t0.search_id}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_dt': t1.ev_dt, 's_name': t0.s_name, 'search_id': t0.search_id}) - JOIN(condition=MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2 | MONTH(t1.search_ts) == t0.s_month3, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) + JOIN(condition=MONTH(t1.search_ts) == t0.s_month3 | MONTH(t1.search_ts) == t0.s_month1 | MONTH(t1.search_ts) == t0.s_month2, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'s_name': t0.s_name, 'search_id': t1.search_id, 'search_string': t1.search_string}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=SEARCHES, columns={'search_id': search_id, 'search_string': search_string, 'search_ts': search_ts}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) AGGREGATE(keys={'s_name': s_name}, aggregations={'n_rows': COUNT(), 'sum_is_intra_season': SUM(name_9 == s_name)}) - JOIN(condition=MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2 | MONTH(t0.search_ts) == t1.s_month3, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t0.search_ts) == t1.s_month3 | MONTH(t0.search_ts) == t1.s_month1 | MONTH(t0.search_ts) == t1.s_month2, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'name_9': t1.s_name, 's_name': t0.s_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'s_name': t0.s_name, 'search_ts': t1.search_ts}) - JOIN(condition=MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2 | MONTH(t1.ev_dt) == t0.s_month3, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) + JOIN(condition=MONTH(t1.ev_dt) == t0.s_month3 | MONTH(t1.ev_dt) == t0.s_month1 | MONTH(t1.ev_dt) == t0.s_month2, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ev_name': t1.ev_name, 's_name': t0.s_name}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_name': ev_name}) SCAN(table=SEARCHES, columns={'search_string': search_string, 'search_ts': search_ts}) diff --git a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt index b0a2597f1..1800f43a3 100644 --- a/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt +++ b/tests/test_plan_refsols/epoch_most_popular_search_engine_per_tod.txt @@ -1,5 +1,5 @@ ROOT(columns=[('tod', t_name), ('search_engine', search_engine), ('n_searches', n_rows)], orderings=[(t_name):asc_first]) - FILTER(condition=RANKING(args=[], partition=[t_name], order=[(n_rows):desc_first, (search_engine):asc_last], allow_ties=False) == 1:numeric, columns={'n_rows': n_rows, 'search_engine': search_engine, 't_name': t_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[t_name], order=[(n_rows):desc_first, (search_engine):asc_last], allow_ties=False), columns={'n_rows': n_rows, 'search_engine': search_engine, 't_name': t_name}) AGGREGATE(keys={'search_engine': search_engine, 't_name': t_name}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.t_start_hour <= HOUR(t1.search_ts) & HOUR(t1.search_ts) < t0.t_end_hour, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_engine': t1.search_engine, 't_name': t0.t_name}) SCAN(table=TIMES, columns={'t_end_hour': t_end_hour, 't_name': t_name, 't_start_hour': t_start_hour}) diff --git a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt index c932a6548..8f47f5c43 100644 --- a/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt +++ b/tests/test_plan_refsols/epoch_most_popular_topic_per_region.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region', user_region), ('event_type', ev_typ), ('n_searches', ndistinct_search_id)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[user_region], order=[(ndistinct_search_id):desc_first], allow_ties=False) == 1:numeric, columns={'ev_typ': ev_typ, 'ndistinct_search_id': ndistinct_search_id, 'user_region': user_region}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[user_region], order=[(ndistinct_search_id):desc_first], allow_ties=False), columns={'ev_typ': ev_typ, 'ndistinct_search_id': ndistinct_search_id, 'user_region': user_region}) AGGREGATE(keys={'ev_typ': ev_typ, 'user_region': user_region}, aggregations={'ndistinct_search_id': NDISTINCT(search_id)}) JOIN(condition=t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t0.search_id, 'user_region': t1.user_region}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_typ': t0.ev_typ, 'search_id': t1.search_id, 'search_user_id': t1.search_user_id}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt index fe613a411..4c02053c2 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_search_other_users_per_user.txt @@ -1,6 +1,6 @@ ROOT(columns=[('user_name', anything_user_name), ('n_other_users', ndistinct_user_id)], orderings=[(ndistinct_user_id):desc_last, (anything_user_name):asc_first], limit=7:numeric) AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_user_name': ANYTHING(user_name), 'ndistinct_user_id': NDISTINCT(user_id_11)}) - JOIN(condition=t1.user_name != t0.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_name != t1.user_name & t0.search_user_id == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'user_id': t0.user_id, 'user_id_11': t1.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'search_string': t1.search_string, 'user_id': t0.user_id, 'user_name': t0.user_name}) diff --git a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt index 589bbc2c0..0cbb8a5cd 100644 --- a/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt +++ b/tests/test_plan_refsols/epoch_overlapping_event_searches_per_user.txt @@ -1,8 +1,8 @@ ROOT(columns=[('user_name', anything_anything_user_name), ('n_searches', n_rows)], orderings=[(n_rows):desc_last, (anything_anything_user_name):asc_first], limit=4:numeric) AGGREGATE(keys={'user_id': user_id}, aggregations={'anything_anything_user_name': ANYTHING(anything_user_name), 'n_rows': COUNT()}) - FILTER(condition=user_id == anything_search_user_id, columns={'anything_user_name': anything_user_name, 'user_id': user_id}) + FILTER(condition=anything_search_user_id == user_id, columns={'anything_user_name': anything_user_name, 'user_id': user_id}) AGGREGATE(keys={'search_id': search_id, 'user_id': user_id}, aggregations={'anything_search_user_id': ANYTHING(search_user_id), 'anything_user_name': ANYTHING(user_name)}) - JOIN(condition=t1.user_name != t0.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) + JOIN(condition=t0.user_name != t1.user_name & t0.user_id_8 == t1.user_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t1.search_string), LOWER(t0.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_id_8': t1.search_user_id, 'user_name': t0.user_name}) JOIN(condition=CONTAINS(LOWER(t0.search_string), LOWER(t1.ev_name)), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ev_name': t1.ev_name, 'search_id': t0.search_id, 'search_user_id': t0.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) JOIN(condition=t0.user_id == t1.search_user_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'search_id': t1.search_id, 'search_string': t1.search_string, 'search_user_id': t1.search_user_id, 'user_id': t0.user_id, 'user_name': t0.user_name}) diff --git a/tests/test_plan_refsols/epoch_summer_events_per_type.txt b/tests/test_plan_refsols/epoch_summer_events_per_type.txt index 22db807aa..56f84039f 100644 --- a/tests/test_plan_refsols/epoch_summer_events_per_type.txt +++ b/tests/test_plan_refsols/epoch_summer_events_per_type.txt @@ -1,6 +1,6 @@ ROOT(columns=[('event_type', ev_typ), ('n_events', n_rows)], orderings=[(ev_typ):asc_first]) AGGREGATE(keys={'ev_typ': ev_typ}, aggregations={'n_rows': COUNT()}) - JOIN(condition=MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2 | MONTH(t0.ev_dt) == t1.s_month3, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_typ': t0.ev_typ}) + JOIN(condition=MONTH(t0.ev_dt) == t1.s_month3 | MONTH(t0.ev_dt) == t1.s_month1 | MONTH(t0.ev_dt) == t1.s_month2, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'ev_typ': t0.ev_typ}) SCAN(table=EVENTS, columns={'ev_dt': ev_dt, 'ev_typ': ev_typ}) FILTER(condition=s_name == 'Summer':string, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3}) SCAN(table=SEASONS, columns={'s_month1': s_month1, 's_month2': s_month2, 's_month3': s_month3, 's_name': s_name}) diff --git a/tests/test_plan_refsols/first_order_in_year.txt b/tests/test_plan_refsols/first_order_in_year.txt index 77dfb03eb..b3934e100 100644 --- a/tests/test_plan_refsols/first_order_in_year.txt +++ b/tests/test_plan_refsols/first_order_in_year.txt @@ -1,4 +1,4 @@ ROOT(columns=[('order_date', o_orderdate), ('key', o_orderkey)], orderings=[(o_orderdate):asc_first]) - FILTER(condition=ABSENT(PREV(args=[o_orderdate], partition=[], order=[(o_orderdate):asc_last, (o_orderkey):asc_last])) | YEAR(PREV(args=[o_orderdate], partition=[], order=[(o_orderdate):asc_last, (o_orderkey):asc_last])) != YEAR(o_orderdate), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) + FILTER(condition=ABSENT(PREV(args=[o_orderdate], partition=[], order=[(o_orderdate):asc_last, (o_orderkey):asc_last])) | YEAR(o_orderdate) != YEAR(PREV(args=[o_orderdate], partition=[], order=[(o_orderdate):asc_last, (o_orderkey):asc_last])), columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) FILTER(condition=MONTH(o_orderdate) == 1:numeric, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/first_order_per_customer.txt b/tests/test_plan_refsols/first_order_per_customer.txt index a445de391..5d56beeba 100644 --- a/tests/test_plan_refsols/first_order_per_customer.txt +++ b/tests/test_plan_refsols/first_order_per_customer.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', c_name), ('first_order_date', o_orderdate), ('first_order JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=c_acctbal >= 9000.0:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/fsi_best_account_customers_per_state_raw.txt b/tests/test_plan_refsols/fsi_best_account_customers_per_state_raw.txt index ef1769562..85dff445f 100644 --- a/tests/test_plan_refsols/fsi_best_account_customers_per_state_raw.txt +++ b/tests/test_plan_refsols/fsi_best_account_customers_per_state_raw.txt @@ -1,7 +1,7 @@ ROOT(columns=[('state', unmask_state), ('balance', balance), ('first_name', UNMASK::(PTY_UNPROTECT([firstname], 'deName'))), ('last_name', UNMASK::(PTY_UNPROTECT([lastname], 'deName')))], orderings=[(unmask_state):asc_first]) - FILTER(condition=RANKING(args=[], partition=[unmask_state], order=[(balance):desc_first], allow_ties=False) == 1:numeric, columns={'balance': balance, 'firstname': firstname, 'lastname': lastname, 'unmask_state': unmask_state}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[unmask_state], order=[(balance):desc_first], allow_ties=False), columns={'balance': balance, 'firstname': firstname, 'lastname': lastname, 'unmask_state': unmask_state}) JOIN(condition=t0.customerid == t1.customerid, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'balance': t1.balance, 'firstname': t0.firstname, 'lastname': t0.lastname, 'unmask_state': t0.unmask_state}) - JOIN(condition=t0.unmask_state == UNMASK::(PTY_UNPROTECT([t1.state], 'deAddress')), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customerid': t1.customerid, 'firstname': t1.firstname, 'lastname': t1.lastname, 'unmask_state': t0.unmask_state}) + JOIN(condition=UNMASK::(PTY_UNPROTECT([t1.state], 'deAddress')) == t0.unmask_state, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customerid': t1.customerid, 'firstname': t1.firstname, 'lastname': t1.lastname, 'unmask_state': t0.unmask_state}) LIMIT(limit=5:numeric, columns={'unmask_state': unmask_state}, orderings=[(unmask_state):asc_first]) AGGREGATE(keys={'unmask_state': UNMASK::(PTY_UNPROTECT([state], 'deAddress'))}, aggregations={}) SCAN(table=bodo.fsi.protected_customers, columns={'state': state}) diff --git a/tests/test_plan_refsols/fsi_best_account_customers_per_state_rewrite.txt b/tests/test_plan_refsols/fsi_best_account_customers_per_state_rewrite.txt index ef1769562..85dff445f 100644 --- a/tests/test_plan_refsols/fsi_best_account_customers_per_state_rewrite.txt +++ b/tests/test_plan_refsols/fsi_best_account_customers_per_state_rewrite.txt @@ -1,7 +1,7 @@ ROOT(columns=[('state', unmask_state), ('balance', balance), ('first_name', UNMASK::(PTY_UNPROTECT([firstname], 'deName'))), ('last_name', UNMASK::(PTY_UNPROTECT([lastname], 'deName')))], orderings=[(unmask_state):asc_first]) - FILTER(condition=RANKING(args=[], partition=[unmask_state], order=[(balance):desc_first], allow_ties=False) == 1:numeric, columns={'balance': balance, 'firstname': firstname, 'lastname': lastname, 'unmask_state': unmask_state}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[unmask_state], order=[(balance):desc_first], allow_ties=False), columns={'balance': balance, 'firstname': firstname, 'lastname': lastname, 'unmask_state': unmask_state}) JOIN(condition=t0.customerid == t1.customerid, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'balance': t1.balance, 'firstname': t0.firstname, 'lastname': t0.lastname, 'unmask_state': t0.unmask_state}) - JOIN(condition=t0.unmask_state == UNMASK::(PTY_UNPROTECT([t1.state], 'deAddress')), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customerid': t1.customerid, 'firstname': t1.firstname, 'lastname': t1.lastname, 'unmask_state': t0.unmask_state}) + JOIN(condition=UNMASK::(PTY_UNPROTECT([t1.state], 'deAddress')) == t0.unmask_state, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customerid': t1.customerid, 'firstname': t1.firstname, 'lastname': t1.lastname, 'unmask_state': t0.unmask_state}) LIMIT(limit=5:numeric, columns={'unmask_state': unmask_state}, orderings=[(unmask_state):asc_first]) AGGREGATE(keys={'unmask_state': UNMASK::(PTY_UNPROTECT([state], 'deAddress'))}, aggregations={}) SCAN(table=bodo.fsi.protected_customers, columns={'state': state}) diff --git a/tests/test_plan_refsols/has_cross_correlated.txt b/tests/test_plan_refsols/has_cross_correlated.txt index 58277ebde..aaccbf8ca 100644 --- a/tests/test_plan_refsols/has_cross_correlated.txt +++ b/tests/test_plan_refsols/has_cross_correlated.txt @@ -1,5 +1,5 @@ ROOT(columns=[('n', ndistinct_c_custkey)], orderings=[]) AGGREGATE(keys={}, aggregations={'ndistinct_c_custkey': NDISTINCT(c_custkey)}) - JOIN(condition=t1.s_nationkey == t0.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) + JOIN(condition=t0.c_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey}) diff --git a/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt b/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt index 6f8a957da..c6978c080 100644 --- a/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt +++ b/tests/test_plan_refsols/health_first_patient_by_coverage_type_raw.txt @@ -4,7 +4,7 @@ ROOT(columns=[('coverage_type', coverage_type), ('first_name', max_anything_unma JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_unmask_date_of_birth': t1.anything_unmask_date_of_birth, 'anything_unmask_first_name': t1.anything_unmask_first_name, 'anything_unmask_last_name': t1.anything_unmask_last_name, 'coverage_type': t0.coverage_type, 'n_rows': t1.n_rows}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) AGGREGATE(keys={'insurance_plan_id': insurance_plan_id}, aggregations={'anything_unmask_date_of_birth': ANYTHING(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), 'anything_unmask_first_name': ANYTHING(UNMASK::(PTY_UNPROTECT_NAME([first_name]))), 'anything_unmask_last_name': ANYTHING(UNMASK::(PTY_UNPROTECT([last_name], 'deName'))), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[coverage_type], order=[(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))):asc_last, (UNMASK::(PTY_UNPROTECT_ACCOUNT([patient_id]))):asc_last], allow_ties=False) == 1:numeric, columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[coverage_type], order=[(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))):asc_last, (UNMASK::(PTY_UNPROTECT_ACCOUNT([patient_id]))):asc_last], allow_ties=False), columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name}) JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'coverage_type': t0.coverage_type, 'date_of_birth': t1.date_of_birth, 'first_name': t1.first_name, 'insurance_plan_id': t0.insurance_plan_id, 'last_name': t1.last_name, 'patient_id': t1.patient_id}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) SCAN(table=bodo.health.protected_patients, columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name, 'patient_id': patient_id}) diff --git a/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt b/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt index 6f8a957da..c6978c080 100644 --- a/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt +++ b/tests/test_plan_refsols/health_first_patient_by_coverage_type_rewrite.txt @@ -4,7 +4,7 @@ ROOT(columns=[('coverage_type', coverage_type), ('first_name', max_anything_unma JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'anything_unmask_date_of_birth': t1.anything_unmask_date_of_birth, 'anything_unmask_first_name': t1.anything_unmask_first_name, 'anything_unmask_last_name': t1.anything_unmask_last_name, 'coverage_type': t0.coverage_type, 'n_rows': t1.n_rows}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) AGGREGATE(keys={'insurance_plan_id': insurance_plan_id}, aggregations={'anything_unmask_date_of_birth': ANYTHING(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))), 'anything_unmask_first_name': ANYTHING(UNMASK::(PTY_UNPROTECT_NAME([first_name]))), 'anything_unmask_last_name': ANYTHING(UNMASK::(PTY_UNPROTECT([last_name], 'deName'))), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[coverage_type], order=[(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))):asc_last, (UNMASK::(PTY_UNPROTECT_ACCOUNT([patient_id]))):asc_last], allow_ties=False) == 1:numeric, columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[coverage_type], order=[(UNMASK::(PTY_UNPROTECT([date_of_birth], 'deDOB'))):asc_last, (UNMASK::(PTY_UNPROTECT_ACCOUNT([patient_id]))):asc_last], allow_ties=False), columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name}) JOIN(condition=t0.insurance_plan_id == t1.insurance_plan_id, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'coverage_type': t0.coverage_type, 'date_of_birth': t1.date_of_birth, 'first_name': t1.first_name, 'insurance_plan_id': t0.insurance_plan_id, 'last_name': t1.last_name, 'patient_id': t1.patient_id}) SCAN(table=bodo.health.insurance_plans, columns={'coverage_type': coverage_type, 'insurance_plan_id': insurance_plan_id}) SCAN(table=bodo.health.protected_patients, columns={'date_of_birth': date_of_birth, 'first_name': first_name, 'insurance_plan_id': insurance_plan_id, 'last_name': last_name, 'patient_id': patient_id}) diff --git a/tests/test_plan_refsols/highest_priority_per_year.txt b/tests/test_plan_refsols/highest_priority_per_year.txt index accf2dc0f..eb95b9889 100644 --- a/tests/test_plan_refsols/highest_priority_per_year.txt +++ b/tests/test_plan_refsols/highest_priority_per_year.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_year', year_o_orderdate), ('highest_priority', o_orderpriority), ('priority_pct', priority_pct)], orderings=[(year_o_orderdate):asc_first]) - FILTER(condition=RANKING(args=[], partition=[year_o_orderdate], order=[(priority_pct):desc_first]) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'priority_pct': priority_pct, 'year_o_orderdate': year_o_orderdate}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[year_o_orderdate], order=[(priority_pct):desc_first]), columns={'o_orderpriority': o_orderpriority, 'priority_pct': priority_pct, 'year_o_orderdate': year_o_orderdate}) PROJECT(columns={'o_orderpriority': o_orderpriority, 'priority_pct': 100.0:numeric * n_rows / RELSUM(args=[n_rows], partition=[year_o_orderdate], order=[]), 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'n_rows': COUNT()}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/lineitem_regional_shipments3.txt b/tests/test_plan_refsols/lineitem_regional_shipments3.txt index 92972470c..d0577731d 100644 --- a/tests/test_plan_refsols/lineitem_regional_shipments3.txt +++ b/tests/test_plan_refsols/lineitem_regional_shipments3.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', r_regionkey), ('name', r_name), ('comment', r_comment)], orderings=[]) - JOIN(condition=t0.n_regionkey == t1.r_regionkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) + JOIN(condition=t0.n_regionkey == t1.r_regionkey & t0.r_name == t1.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'r_comment': t1.r_comment, 'r_name': t1.r_name, 'r_regionkey': t1.r_regionkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) JOIN(condition=t0.o_custkey == t1.c_custkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'c_nationkey': t1.c_nationkey, 'r_name': t0.r_name}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'o_custkey': t1.o_custkey, 'r_name': t0.r_name}) diff --git a/tests/test_plan_refsols/many_net_filter_1.txt b/tests/test_plan_refsols/many_net_filter_1.txt index c3287b05e..4ed8bee41 100644 --- a/tests/test_plan_refsols/many_net_filter_1.txt +++ b/tests/test_plan_refsols/many_net_filter_1.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=s_nationkey == 1:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_10.txt b/tests/test_plan_refsols/many_net_filter_10.txt index 8eb35e3e3..b72768f54 100644 --- a/tests/test_plan_refsols/many_net_filter_10.txt +++ b/tests/test_plan_refsols/many_net_filter_10.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_11.txt b/tests/test_plan_refsols/many_net_filter_11.txt index 6122ff62a..b33c63692 100644 --- a/tests/test_plan_refsols/many_net_filter_11.txt +++ b/tests/test_plan_refsols/many_net_filter_11.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) FILTER(condition=NOT(ISIN(s_nationkey, [0, 3, 6, 9, 12, 15, 18, 21, 24]:array[unknown])), columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_2.txt b/tests/test_plan_refsols/many_net_filter_2.txt index b17aa1d69..d5ab9b6c8 100644 --- a/tests/test_plan_refsols/many_net_filter_2.txt +++ b/tests/test_plan_refsols/many_net_filter_2.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=s_nationkey == 2:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_3.txt b/tests/test_plan_refsols/many_net_filter_3.txt index 0a679d928..79cc6dbe3 100644 --- a/tests/test_plan_refsols/many_net_filter_3.txt +++ b/tests/test_plan_refsols/many_net_filter_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.s_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.s_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=c_nationkey == 3:numeric, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/many_net_filter_4.txt b/tests/test_plan_refsols/many_net_filter_4.txt index c05b4d33a..bc12ee1aa 100644 --- a/tests/test_plan_refsols/many_net_filter_4.txt +++ b/tests/test_plan_refsols/many_net_filter_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) FILTER(condition=s_nationkey == 4:numeric, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_5.txt b/tests/test_plan_refsols/many_net_filter_5.txt index 5c3449d2a..de63f4390 100644 --- a/tests/test_plan_refsols/many_net_filter_5.txt +++ b/tests/test_plan_refsols/many_net_filter_5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_6.txt b/tests/test_plan_refsols/many_net_filter_6.txt index 47ce0b021..50ec38a67 100644 --- a/tests/test_plan_refsols/many_net_filter_6.txt +++ b/tests/test_plan_refsols/many_net_filter_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_7.txt b/tests/test_plan_refsols/many_net_filter_7.txt index fa1027a5b..e4c00810b 100644 --- a/tests/test_plan_refsols/many_net_filter_7.txt +++ b/tests/test_plan_refsols/many_net_filter_7.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_8.txt b/tests/test_plan_refsols/many_net_filter_8.txt index 583e93edb..b30bbb026 100644 --- a/tests/test_plan_refsols/many_net_filter_8.txt +++ b/tests/test_plan_refsols/many_net_filter_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/many_net_filter_9.txt b/tests/test_plan_refsols/many_net_filter_9.txt index 2f693c013..3f79f3eb9 100644 --- a/tests/test_plan_refsols/many_net_filter_9.txt +++ b/tests/test_plan_refsols/many_net_filter_9.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.n_nationkey == t1.c_nationkey & t1.c_custkey == t0.s_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.n_nationkey == t1.c_nationkey & t0.s_suppkey == t1.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) JOIN(condition=t0.n_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'n_nationkey': t1.n_nationkey, 's_suppkey': t0.s_suppkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'n_regionkey': t1.n_regionkey, 's_suppkey': t0.s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_nationkey': s_nationkey, 's_suppkey': s_suppkey}) diff --git a/tests/test_plan_refsols/month_year_sliding_windows.txt b/tests/test_plan_refsols/month_year_sliding_windows.txt index cac47c047..c9fb8583d 100644 --- a/tests/test_plan_refsols/month_year_sliding_windows.txt +++ b/tests/test_plan_refsols/month_year_sliding_windows.txt @@ -1,7 +1,7 @@ ROOT(columns=[('year', year_o_orderdate), ('month', month_o_orderdate)], orderings=[(year_o_orderdate):asc_first, (month_o_orderdate):asc_first]) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0) & DEFAULT_TO(sum_o_totalprice, 0:numeric) > PREV(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last, (month_o_orderdate):asc_last], default=0.0), columns={'month_o_orderdate': month_o_orderdate, 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'month_o_orderdate': MONTH(o_orderdate), 'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) - JOIN(condition=t0.year_o_orderdate == YEAR(t1.o_orderdate), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) + JOIN(condition=YEAR(t1.o_orderdate) == t0.year_o_orderdate, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'o_orderdate': t1.o_orderdate, 'o_totalprice': t1.o_totalprice}) FILTER(condition=DEFAULT_TO(sum_o_totalprice, 0:numeric) > next_year_total_spent, columns={'year_o_orderdate': year_o_orderdate}) PROJECT(columns={'next_year_total_spent': NEXT(args=[DEFAULT_TO(sum_o_totalprice, 0:numeric)], partition=[], order=[(year_o_orderdate):asc_last], default=0.0), 'sum_o_totalprice': sum_o_totalprice, 'year_o_orderdate': year_o_orderdate}) AGGREGATE(keys={'year_o_orderdate': YEAR(o_orderdate)}, aggregations={'sum_o_totalprice': SUM(o_totalprice)}) diff --git a/tests/test_plan_refsols/multi_partition_access_2.txt b/tests/test_plan_refsols/multi_partition_access_2.txt index cec6ecabc..a4e5cca4a 100644 --- a/tests/test_plan_refsols/multi_partition_access_2.txt +++ b/tests/test_plan_refsols/multi_partition_access_2.txt @@ -1,7 +1,7 @@ ROOT(columns=[('transaction_id', sbTxId), ('name', sbCustName), ('symbol', sbTickerSymbol), ('transaction_type', sbTxType), ('cus_tick_typ_avg_shares', avg_sbTxShares), ('cust_tick_avg_shares', cust_tick_avg_shares), ('cust_avg_shares', sum_sbTxShares / count_sbTxShares)], orderings=[(sbTxId):asc_first]) JOIN(condition=t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t0.sbCustName, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTxId': t0.sbTxId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbCustName': t1.sbCustName, 'sbTxId': t0.sbTxId, 'sbTxTickerId': t0.sbTxTickerId, 'sbTxType': t0.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) - JOIN(condition=t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.sum_sbTxShares / t0.count_sbTxShares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) + JOIN(condition=t1.sbTxShares < t0.sum_sbTxShares / t0.count_sbTxShares & t1.sbTxShares < t0.avg_sbTxShares & t1.sbTxShares < t0.cust_tick_avg_shares & t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId & t0.sbTxType == t1.sbTxType, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'avg_sbTxShares': t0.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.cust_tick_avg_shares, 'sbTxCustId': t1.sbTxCustId, 'sbTxId': t1.sbTxId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxTickerId == t1.sbTxTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'cust_tick_avg_shares': t0.avg_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sbTxType': t1.sbTxType, 'sum_sbTxShares': t0.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'avg_sbTxShares': t1.avg_sbTxShares, 'count_sbTxShares': t0.count_sbTxShares, 'sbTxCustId': t1.sbTxCustId, 'sbTxTickerId': t1.sbTxTickerId, 'sum_sbTxShares': t0.sum_sbTxShares}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'count_sbTxShares': t1.count_sbTxShares, 'sbTxCustId': t0.sbTxCustId, 'sum_sbTxShares': t1.sum_sbTxShares}) diff --git a/tests/test_plan_refsols/multi_partition_access_3.txt b/tests/test_plan_refsols/multi_partition_access_3.txt index 3203eb669..9f8748a78 100644 --- a/tests/test_plan_refsols/multi_partition_access_3.txt +++ b/tests/test_plan_refsols/multi_partition_access_3.txt @@ -7,7 +7,7 @@ ROOT(columns=[('symbol', sbTickerSymbol), ('close', sbDpClose)], orderings=[(sbT JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerType': t0.sbTickerType}) SCAN(table=main.sbTicker, columns={'sbTickerId': sbTickerId, 'sbTickerType': sbTickerType}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) - JOIN(condition=t0.sbDpTickerId == t1.sbDpTickerId & t1.sbDpClose == t0.max_sbDpClose, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) + JOIN(condition=t0.max_sbDpClose == t1.sbDpClose & t0.sbDpTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbDpClose': t1.sbDpClose, 'sbTickerSymbol': t1.sbTickerSymbol, 'sbTickerType': t1.sbTickerType}) AGGREGATE(keys={'sbDpTickerId': sbDpTickerId}, aggregations={'max_sbDpClose': MAX(sbDpClose)}) SCAN(table=main.sbDailyPrice, columns={'sbDpClose': sbDpClose, 'sbDpTickerId': sbDpTickerId}) JOIN(condition=t0.sbTickerId == t1.sbDpTickerId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'sbDpClose': t1.sbDpClose, 'sbDpTickerId': t1.sbDpTickerId, 'sbTickerSymbol': t0.sbTickerSymbol, 'sbTickerType': t0.sbTickerType}) diff --git a/tests/test_plan_refsols/multi_partition_access_6.txt b/tests/test_plan_refsols/multi_partition_access_6.txt index f7361f00d..26c7135cc 100644 --- a/tests/test_plan_refsols/multi_partition_access_6.txt +++ b/tests/test_plan_refsols/multi_partition_access_6.txt @@ -1,5 +1,5 @@ ROOT(columns=[('transaction_id', sbTxId)], orderings=[(sbTxId):asc_first]) - JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t1.n_rows == 1:numeric | t0.n_rows == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) + JOIN(condition=t0.sbTxCustId == t1.sbTxCustId & t0.sbTxType == t1.sbTxType & t0.n_rows == 1:numeric | t1.n_rows == 1:numeric, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxId': t1.sbTxId}) AGGREGATE(keys={'sbTxCustId': sbTxCustId, 'sbTxType': sbTxType}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.sbTxCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbTxCustId': t1.sbTxCustId, 'sbTxType': t1.sbTxType}) FILTER(condition=n_rows > 1:numeric, columns={'sbTxCustId': sbTxCustId}) diff --git a/tests/test_plan_refsols/n_orders_first_day.txt b/tests/test_plan_refsols/n_orders_first_day.txt index bfe80cdfa..4be21900e 100644 --- a/tests/test_plan_refsols/n_orders_first_day.txt +++ b/tests/test_plan_refsols/n_orders_first_day.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n_orders', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[], order=[(o_orderdate):asc_last], allow_ties=True) == 1:numeric, columns={}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[], order=[(o_orderdate):asc_last], allow_ties=True), columns={}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate}) diff --git a/tests/test_plan_refsols/nation_best_order.txt b/tests/test_plan_refsols/nation_best_order.txt index cacdcf7ea..a7d72078e 100644 --- a/tests/test_plan_refsols/nation_best_order.txt +++ b/tests/test_plan_refsols/nation_best_order.txt @@ -4,7 +4,7 @@ ROOT(columns=[('nation_name', n_name), ('customer_name', c_name), ('order_key', SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) FILTER(condition=r_name == 'ASIA':string, columns={'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False) == 1:numeric, columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[c_nationkey], order=[(o_totalprice):desc_first], allow_ties=False), columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': value_percentage}) PROJECT(columns={'c_name': c_name, 'c_nationkey': c_nationkey, 'o_orderkey': o_orderkey, 'o_totalprice': o_totalprice, 'value_percentage': 100.0:numeric * o_totalprice / RELSUM(args=[o_totalprice], partition=[c_nationkey], order=[])}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'c_name': t0.c_name, 'c_nationkey': t0.c_nationkey, 'o_orderkey': t1.o_orderkey, 'o_totalprice': t1.o_totalprice}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/order_info_per_priority.txt b/tests/test_plan_refsols/order_info_per_priority.txt index 9b6412b36..a5bbb4151 100644 --- a/tests/test_plan_refsols/order_info_per_priority.txt +++ b/tests/test_plan_refsols/order_info_per_priority.txt @@ -1,4 +1,4 @@ ROOT(columns=[('order_priority', o_orderpriority), ('order_key', o_orderkey), ('order_total_price', o_totalprice)], orderings=[(o_orderpriority):asc_first]) - FILTER(condition=RANKING(args=[], partition=[o_orderpriority], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_orderpriority], order=[(o_totalprice):desc_first]), columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) FILTER(condition=YEAR(o_orderdate) == 1992:numeric, columns={'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/orders_versus_first_orders.txt b/tests/test_plan_refsols/orders_versus_first_orders.txt index 8ac11284d..1ced0c08b 100644 --- a/tests/test_plan_refsols/orders_versus_first_orders.txt +++ b/tests/test_plan_refsols/orders_versus_first_orders.txt @@ -1,7 +1,7 @@ ROOT(columns=[('customer_name', c_name), ('order_key', o_orderkey), ('days_since_first_order', DATEDIFF('days':string, order_date_8, o_orderdate))], orderings=[(DATEDIFF('days':string, order_date_8, o_orderdate)):desc_last, (c_name):asc_first], limit=5:numeric) JOIN(condition=t0.o_custkey == t1.c_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'c_name': t1.c_name, 'o_orderdate': t0.o_orderdate, 'o_orderkey': t0.o_orderkey, 'order_date_8': t1.o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderkey': o_orderkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False), columns={'c_custkey': c_custkey, 'c_name': c_name, 'o_orderdate': o_orderdate}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name, 'o_custkey': t1.o_custkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.c_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'c_custkey': t0.c_custkey, 'c_name': t0.c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/pagerank_a1.txt b/tests/test_plan_refsols/pagerank_a1.txt index 552e10569..e19bcbb88 100644 --- a/tests/test_plan_refsols/pagerank_a1.txt +++ b/tests/test_plan_refsols/pagerank_a1.txt @@ -1,7 +1,7 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_a2.txt b/tests/test_plan_refsols/pagerank_a2.txt index 6abaafaeb..588b922fa 100644 --- a/tests/test_plan_refsols/pagerank_a2.txt +++ b/tests/test_plan_refsols/pagerank_a2.txt @@ -1,11 +1,11 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_a6.txt b/tests/test_plan_refsols/pagerank_a6.txt index 878b61cc8..50bfeee11 100644 --- a/tests/test_plan_refsols/pagerank_a6.txt +++ b/tests/test_plan_refsols/pagerank_a6.txt @@ -1,27 +1,27 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_b3.txt b/tests/test_plan_refsols/pagerank_b3.txt index 8eb55a421..0204e5725 100644 --- a/tests/test_plan_refsols/pagerank_b3.txt +++ b/tests/test_plan_refsols/pagerank_b3.txt @@ -1,15 +1,15 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_c4.txt b/tests/test_plan_refsols/pagerank_c4.txt index 1f5c1470d..7f7421547 100644 --- a/tests/test_plan_refsols/pagerank_c4.txt +++ b/tests/test_plan_refsols/pagerank_c4.txt @@ -1,19 +1,19 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_d5.txt b/tests/test_plan_refsols/pagerank_d5.txt index 8f80ed661..14b9c3b5a 100644 --- a/tests/test_plan_refsols/pagerank_d5.txt +++ b/tests/test_plan_refsols/pagerank_d5.txt @@ -1,23 +1,23 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/pagerank_h8.txt b/tests/test_plan_refsols/pagerank_h8.txt index ea825a6cc..ee949fd8d 100644 --- a/tests/test_plan_refsols/pagerank_h8.txt +++ b/tests/test_plan_refsols/pagerank_h8.txt @@ -1,35 +1,35 @@ ROOT(columns=[('key', s_key), ('page_rank', ROUND(page_rank, 5:numeric))], orderings=[(s_key):asc_first]) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'page_rank': page_rank, 's_key': s_key}) PROJECT(columns={'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t0.l_source, 'l_target': t0.l_target, 'page_rank': t0.page_rank, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'l_source': t1.l_source, 'l_target': t1.l_target, 'page_rank': t0.page_rank, 'sum_n_target': t0.sum_n_target}) FILTER(condition=l_source == l_target & PRESENT(l_target), columns={'anything_n': anything_n, 'page_rank': page_rank, 's_key': s_key, 'sum_n_target': sum_n_target}) PROJECT(columns={'anything_n': anything_n, 'l_source': l_source, 'l_target': l_target, 'page_rank': 0.15:numeric / anything_n + 0.85:numeric * RELSUM(args=[INTEGER(ABSENT(l_target) | l_source != l_target) * anything_page_rank / DEFAULT_TO(sum_n_target, 0:numeric)], partition=[s_key], order=[]), 's_key': s_key, 'sum_n_target': sum_n_target}) - JOIN(condition=ABSENT(t0.l_target) | t1.s_key == t0.l_target, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) + JOIN(condition=ABSENT(t0.l_target) | t0.l_target == t1.s_key, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t0.l_source, 'l_target': t0.l_target, 's_key': t1.s_key, 'sum_n_target': t0.sum_n_target}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'anything_n': t0.anything_n, 'anything_page_rank': t0.anything_page_rank, 'l_source': t1.l_source, 'l_target': t1.l_target, 'sum_n_target': t0.sum_n_target}) AGGREGATE(keys={'s_key': s_key}, aggregations={'anything_n': ANYTHING(n), 'anything_page_rank': ANYTHING(page_rank), 'sum_n_target': SUM(IFF(ABSENT(l_target), n, INTEGER(l_source != l_target)))}) JOIN(condition=t0.s_key == t1.l_source, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_source': t1.l_source, 'l_target': t1.l_target, 'n': t0.n, 'page_rank': t0.page_rank, 's_key': t0.s_key}) diff --git a/tests/test_plan_refsols/part_cross_part_a.txt b/tests/test_plan_refsols/part_cross_part_a.txt index 90ec89d6c..e904af361 100644 --- a/tests/test_plan_refsols/part_cross_part_a.txt +++ b/tests/test_plan_refsols/part_cross_part_a.txt @@ -1,12 +1,12 @@ ROOT(columns=[('state', sbCustState), ('exchange', sbTickerExchange), ('n', DEFAULT_TO(sum_n_rows, 0:numeric))], orderings=[(sbCustState):asc_first, (sbTickerExchange):asc_first]) AGGREGATE(keys={'sbCustState': sbCustState, 'sbTickerExchange': sbTickerExchange}, aggregations={'sum_n_rows': SUM(n_rows)}) - JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbCustId == t1.sbCustId, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbCustId == t1.sbCustId & t0.sbTickerExchange == t1.sbTickerExchange, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbCustState': t1.sbCustState, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) SCAN(table=main.sbTicker, columns={'sbTickerExchange': sbTickerExchange}) SCAN(table=main.sbCustomer, columns={'sbCustId': sbCustId, 'sbCustState': sbCustState}) AGGREGATE(keys={'sbCustId': sbCustId, 'sbTickerExchange': sbTickerExchange}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxTickerId == t1.sbTickerId & t1.sbTickerExchange == t0.sbTickerExchange, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) + JOIN(condition=t0.sbTickerExchange == t1.sbTickerExchange & t0.sbTxTickerId == t1.sbTickerId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) JOIN(condition=t0.sbCustId == t1.sbTxCustId, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'sbCustId': t0.sbCustId, 'sbTickerExchange': t0.sbTickerExchange, 'sbTxTickerId': t1.sbTxTickerId}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'sbCustId': t1.sbCustId, 'sbTickerExchange': t0.sbTickerExchange}) AGGREGATE(keys={'sbTickerExchange': sbTickerExchange}, aggregations={}) diff --git a/tests/test_plan_refsols/part_cross_part_b.txt b/tests/test_plan_refsols/part_cross_part_b.txt index 02f07c848..5a8304b17 100644 --- a/tests/test_plan_refsols/part_cross_part_b.txt +++ b/tests/test_plan_refsols/part_cross_part_b.txt @@ -1,5 +1,5 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(args=[DEFAULT_TO(n_rows, 0:numeric)], partition=[sbCustState], order=[(month):asc_last], cumulative=True))], orderings=[(sbCustState):asc_first, (month):asc_first]) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == t1.month & t0.sbCustState == t1.sbCustState, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'month': t0.month, 'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) @@ -7,8 +7,8 @@ ROOT(columns=[('state', sbCustState), ('month_of_year', month), ('n', RELSUM(arg FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=DATETIME(t1.sbTxDateTime, 'start of month':string) == t0.month, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/part_cross_part_c.txt b/tests/test_plan_refsols/part_cross_part_c.txt index 04136287d..fea731d0e 100644 --- a/tests/test_plan_refsols/part_cross_part_c.txt +++ b/tests/test_plan_refsols/part_cross_part_c.txt @@ -1,6 +1,6 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={'max_n': MAX(DEFAULT_TO(n_rows, 0:numeric))}) - JOIN(condition=t0.sbCustState == t1.sbCustState & t0.month == t1.month, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) + JOIN(condition=t0.month == t1.month & t0.sbCustState == t1.sbCustState, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'sbCustState': t0.sbCustState}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) @@ -8,8 +8,8 @@ ROOT(columns=[('state', sbCustState), ('max_n', max_n)], orderings=[]) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) AGGREGATE(keys={'month': month, 'sbCustState': sbCustState}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.sbTxCustId == t1.sbCustId & t1.sbCustState == t0.sbCustState, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) - JOIN(condition=t0.month == DATETIME(t1.sbTxDateTime, 'start of month':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) + JOIN(condition=t0.sbCustState == t1.sbCustState & t0.sbTxCustId == t1.sbCustId, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState}) + JOIN(condition=DATETIME(t1.sbTxDateTime, 'start of month':string) == t0.month, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'month': t0.month, 'sbCustState': t0.sbCustState, 'sbTxCustId': t1.sbTxCustId}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'month': t1.month, 'sbCustState': t0.sbCustState}) AGGREGATE(keys={'sbCustState': sbCustState}, aggregations={}) SCAN(table=main.sbCustomer, columns={'sbCustState': sbCustState}) diff --git a/tests/test_plan_refsols/percentile_customers_per_region.txt b/tests/test_plan_refsols/percentile_customers_per_region.txt index 627b425c7..19bd8d4b3 100644 --- a/tests/test_plan_refsols/percentile_customers_per_region.txt +++ b/tests/test_plan_refsols/percentile_customers_per_region.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', c_name)], orderings=[(c_name):asc_first]) - FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) == 95:numeric & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) + FILTER(condition=95:numeric == PERCENTILE(args=[], partition=[n_regionkey], order=[(c_acctbal):asc_last]) & ENDSWITH(c_phone, '00':string), columns={'c_name': c_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'c_phone': t1.c_phone, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'c_nationkey': c_nationkey, 'c_phone': c_phone}) diff --git a/tests/test_plan_refsols/quarter_cum_ir_analysis.txt b/tests/test_plan_refsols/quarter_cum_ir_analysis.txt index 8e39cad16..bc47c63b6 100644 --- a/tests/test_plan_refsols/quarter_cum_ir_analysis.txt +++ b/tests/test_plan_refsols/quarter_cum_ir_analysis.txt @@ -7,7 +7,7 @@ ROOT(columns=[('quarter', quarter), ('n_incidents', DEFAULT_TO(ndistinct_in_devi SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t1.ca_dt < DATETIME(t0.pr_release, '+2 years':string, 'start of quarter':string) & t1.ca_dt >= t0.pr_release, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t1.ca_dt}) FILTER(condition=pr_name == 'RubyCopper-Star':string, columns={'pr_release': pr_release}) SCAN(table=main.PRODUCTS, columns={'pr_name': pr_name, 'pr_release': pr_release}) @@ -16,8 +16,8 @@ ROOT(columns=[('quarter', quarter), ('n_incidents', DEFAULT_TO(ndistinct_in_devi SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'quarter': quarter}, aggregations={'ndistinct_in_device_id': NDISTINCT(in_device_id)}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'in_device_id': t0.in_device_id, 'quarter': t0.quarter}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string) & t1.in_repair_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'in_device_id': t1.in_device_id, 'quarter': t0.quarter}) - JOIN(condition=t0.quarter == DATETIME(t1.ca_dt, 'start of quarter':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t1.ca_dt, 'co_id': t0.co_id, 'quarter': t0.quarter}) + JOIN(condition=DATETIME(t1.in_error_report_ts, 'start of day':string) == t0.ca_dt & t0.co_id == t1.in_repair_country_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'in_device_id': t1.in_device_id, 'quarter': t0.quarter}) + JOIN(condition=DATETIME(t1.ca_dt, 'start of quarter':string) == t0.quarter, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t1.ca_dt, 'co_id': t0.co_id, 'quarter': t0.quarter}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'co_id': t0.co_id, 'quarter': t1.quarter}) JOIN(condition=True:bool, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'co_id': t1.co_id}) FILTER(condition=pr_name == 'RubyCopper-Star':string, columns={}) diff --git a/tests/test_plan_refsols/rank_with_filters_c.txt b/tests/test_plan_refsols/rank_with_filters_c.txt index 57adb4a27..a67b07ac6 100644 --- a/tests/test_plan_refsols/rank_with_filters_c.txt +++ b/tests/test_plan_refsols/rank_with_filters_c.txt @@ -1,5 +1,5 @@ ROOT(columns=[('pname', p_name), ('psize', size)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first, (p_partkey):asc_last]) == 1:numeric, columns={'p_name': p_name, 'size': size_1}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[p_size], order=[(p_retailprice):desc_first, (p_partkey):asc_last]), columns={'p_name': p_name, 'size': size_1}) JOIN(condition=t0.p_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_partkey': t1.p_partkey, 'p_retailprice': t1.p_retailprice, 'p_size': t0.p_size, 'size_1': t1.p_size}) LIMIT(limit=5:numeric, columns={'p_size': p_size}, orderings=[(p_size):desc_last]) AGGREGATE(keys={'p_size': p_size}, aggregations={}) diff --git a/tests/test_plan_refsols/region_orders_from_nations_richest.txt b/tests/test_plan_refsols/region_orders_from_nations_richest.txt index 7374e5606..300174523 100644 --- a/tests/test_plan_refsols/region_orders_from_nations_richest.txt +++ b/tests/test_plan_refsols/region_orders_from_nations_richest.txt @@ -3,7 +3,7 @@ ROOT(columns=[('region_name', r_name), ('n_orders', DEFAULT_TO(sum_n_rows, 0:num SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'n_regionkey': n_regionkey}, aggregations={'sum_n_rows': SUM(n_rows)}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows}) - FILTER(condition=RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[c_nationkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False), columns={'c_custkey': c_custkey, 'n_regionkey': n_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'c_name': t1.c_name, 'c_nationkey': t1.c_nationkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/regional_first_order_best_line_part.txt b/tests/test_plan_refsols/regional_first_order_best_line_part.txt index f3af15301..27c008b27 100644 --- a/tests/test_plan_refsols/regional_first_order_best_line_part.txt +++ b/tests/test_plan_refsols/regional_first_order_best_line_part.txt @@ -2,9 +2,9 @@ ROOT(columns=[('region_name', r_name), ('part_name', p_name)], orderings=[(r_nam JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'p_name': t1.p_name, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'n_regionkey': t0.n_regionkey, 'p_name': t1.p_name}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False) == 1:numeric, columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(l_quantity):desc_first, (l_linenumber):asc_last], allow_ties=False), columns={'l_partkey': l_partkey, 'n_regionkey': n_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'n_regionkey': t0.n_regionkey}) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False) == 1:numeric, columns={'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(o_orderdate):asc_last, (o_orderkey):asc_last], allow_ties=False), columns={'n_regionkey': n_regionkey, 'o_orderkey': o_orderkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'o_orderdate': t1.o_orderdate, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/regional_suppliers_percentile.txt b/tests/test_plan_refsols/regional_suppliers_percentile.txt index 6a369877b..5db978fd2 100644 --- a/tests/test_plan_refsols/regional_suppliers_percentile.txt +++ b/tests/test_plan_refsols/regional_suppliers_percentile.txt @@ -1,5 +1,5 @@ ROOT(columns=[('name', s_name)], orderings=[]) - FILTER(condition=PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000) == 1000:numeric, columns={'s_name': s_name}) + FILTER(condition=1000:numeric == PERCENTILE(args=[], partition=[n_regionkey], order=[(n_rows):asc_last, (s_name):asc_last], n_buckets=1000), columns={'s_name': s_name}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 'n_rows': t1.n_rows, 's_name': t0.s_name}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_regionkey': t0.n_regionkey, 's_name': t1.s_name, 's_suppkey': t1.s_suppkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/retail_members_agg_best_raw.txt b/tests/test_plan_refsols/retail_members_agg_best_raw.txt index 156d514d4..9a4851379 100644 --- a/tests/test_plan_refsols/retail_members_agg_best_raw.txt +++ b/tests/test_plan_refsols/retail_members_agg_best_raw.txt @@ -1,5 +1,5 @@ ROOT(columns=[('store_location', UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))), ('total_amount', total_amount), ('name', JOIN_STRINGS(' ':string, UNMASK::(PTY_UNPROTECT([first_name], 'deName')), UNMASK::(PTY_UNPROTECT_NAME([last_name]))))], orderings=[(total_amount):desc_last], limit=5:numeric) - FILTER(condition=RANKING(args=[], partition=[UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))], order=[(total_amount):desc_first], allow_ties=False) == 1:numeric, columns={'first_name': first_name, 'last_name': last_name, 'store_location': store_location, 'total_amount': total_amount}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))], order=[(total_amount):desc_first], allow_ties=False), columns={'first_name': first_name, 'last_name': last_name, 'store_location': store_location, 'total_amount': total_amount}) JOIN(condition=t0.customer_id == t1.customer_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'first_name': t1.first_name, 'last_name': t1.last_name, 'store_location': t0.store_location, 'total_amount': t0.total_amount}) SCAN(table=bodo.retail.transactions, columns={'customer_id': customer_id, 'store_location': store_location, 'total_amount': total_amount}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'customer_id': customer_id, 'first_name': first_name, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_members_agg_best_rewrite.txt b/tests/test_plan_refsols/retail_members_agg_best_rewrite.txt index 156d514d4..9a4851379 100644 --- a/tests/test_plan_refsols/retail_members_agg_best_rewrite.txt +++ b/tests/test_plan_refsols/retail_members_agg_best_rewrite.txt @@ -1,5 +1,5 @@ ROOT(columns=[('store_location', UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))), ('total_amount', total_amount), ('name', JOIN_STRINGS(' ':string, UNMASK::(PTY_UNPROTECT([first_name], 'deName')), UNMASK::(PTY_UNPROTECT_NAME([last_name]))))], orderings=[(total_amount):desc_last], limit=5:numeric) - FILTER(condition=RANKING(args=[], partition=[UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))], order=[(total_amount):desc_first], allow_ties=False) == 1:numeric, columns={'first_name': first_name, 'last_name': last_name, 'store_location': store_location, 'total_amount': total_amount}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[UNMASK::(PTY_UNPROTECT_ADDRESS([store_location]))], order=[(total_amount):desc_first], allow_ties=False), columns={'first_name': first_name, 'last_name': last_name, 'store_location': store_location, 'total_amount': total_amount}) JOIN(condition=t0.customer_id == t1.customer_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'first_name': t1.first_name, 'last_name': t1.last_name, 'store_location': t0.store_location, 'total_amount': t0.total_amount}) SCAN(table=bodo.retail.transactions, columns={'customer_id': customer_id, 'store_location': store_location, 'total_amount': total_amount}) SCAN(table=bodo.retail.protected_loyalty_members, columns={'customer_id': customer_id, 'first_name': first_name, 'last_name': last_name}) diff --git a/tests/test_plan_refsols/retail_transactions_ts_raw.txt b/tests/test_plan_refsols/retail_transactions_ts_raw.txt index 84d1b5d73..59fe9c58f 100644 --- a/tests/test_plan_refsols/retail_transactions_ts_raw.txt +++ b/tests/test_plan_refsols/retail_transactions_ts_raw.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_10), ('n3', sum_expr_11), ('n4', sum_expr_12), ('n5', sum_expr_13)], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_10': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_11': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_12': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_13': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric)}) - FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric, columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_10': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric), 'sum_expr_11': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_12': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric), 'sum_expr_13': SUM(DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric & MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))))}) + FILTER(condition=DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 1:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 7:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 2:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == 8:numeric | DAY(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 4:numeric & HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric | HOUR(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) < 3:numeric & MINUTE(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))) == SECOND(UNMASK::(PTY_UNPROTECT_TS([transaction_date]))), columns={'transaction_date': transaction_date}) SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt index 0a01ae0a8..408cbafcc 100644 --- a/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt +++ b/tests/test_plan_refsols/retail_transactions_ts_rewrite.txt @@ -1,4 +1,4 @@ ROOT(columns=[('n1', sum_expr), ('n2', sum_expr_10), ('n3', sum_expr_11), ('n4', sum_expr_12), ('n5', sum_expr_13)], orderings=[]) - AGGREGATE(keys={}, aggregations={'sum_expr': SUM(ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown])), 'sum_expr_10': SUM(ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown])), 'sum_expr_11': SUM(ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown])), 'sum_expr_12': SUM(ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown])), 'sum_expr_13': SUM(ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]))}) - FILTER(condition=ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown]) | ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown]) | ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown]) | ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown]) | ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]), columns={'transaction_date': transaction_date}) + AGGREGATE(keys={}, aggregations={'sum_expr': SUM(ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown])), 'sum_expr_10': SUM(ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown])), 'sum_expr_11': SUM(ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown])), 'sum_expr_12': SUM(ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown])), 'sum_expr_13': SUM(ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]) | ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]))}) + FILTER(condition=ISIN(transaction_date, ['1752-07-20 01:18:18', '1880-04-06 00:47:47', '2956-09-24 00:03:03', '1868-06-13 01:22:22', '0780-03-23 01:14:14', '1598-04-24 01:11:11', '0763-04-15 00:16:16', '2780-03-19 01:32:32']:array[unknown]) | ISIN(transaction_date, ['2268-07-06 01:50:11', '3056-08-07 01:18:26', '3120-07-22 02:43:20', '1010-12-08 01:47:15', '1440-10-15 02:26:30', '3054-12-02 00:51:55', '3031-02-17 00:54:21', '1539-02-23 00:49:34', '2418-09-09 01:12:48', '2418-09-09 02:09:31', '2551-01-12 00:34:57', '3141-01-25 02:24:01', '2328-01-19 01:20:40', '1577-03-20 00:27:19', '1608-08-20 00:10:55', '1608-08-20 01:12:55', '1608-08-20 02:14:47', '2825-09-23 02:31:19', '1286-12-21 00:21:24', '1286-12-21 01:25:46', '3300-07-12 00:15:35', '2059-07-23 01:56:15', '2955-06-27 00:48:34', '2955-06-27 01:24:43', '0937-05-21 00:40:43', '0930-11-28 02:44:19', '1605-10-12 00:58:57', '0781-08-29 02:28:10', '2374-09-21 00:21:42', '2374-09-21 02:10:55', '3022-05-13 01:56:21', '3088-03-30 01:09:15', '3088-03-30 02:38:56', '1757-01-16 00:20:29', '3287-10-20 01:17:31', '2555-09-08 00:40:20', '2555-09-08 01:20:22', '2555-09-08 02:36:58', '2176-01-07 02:50:08', '2282-06-16 00:21:35', '2595-05-23 01:32:01', '3237-05-26 01:19:24', '3237-05-26 01:52:49', '2780-03-19 01:32:32', '2780-03-19 02:33:01']:array[unknown]) | ISIN(transaction_date, ['1010-12-08 07:23:35', '2328-01-19 07:33:25', '1577-03-20 07:41:29', '1345-03-06 07:41:47', '0937-05-21 07:27:48', '2176-01-07 07:07:03']:array[unknown]) | ISIN(transaction_date, ['2052-11-18 08:24:33', '2052-11-18 08:32:00', '1577-03-20 08:03:51', '1577-03-20 08:32:17', '2550-01-17 08:56:44', '1551-03-04 08:36:08']:array[unknown]) | ISIN(transaction_date, ['3120-07-22 08:30:44', '1890-02-18 08:21:13', '1890-02-18 08:46:51']:array[unknown]) | ISIN(transaction_date, ['2178-03-20 07:19:29', '2825-09-23 07:37:08']:array[unknown]), columns={'transaction_date': transaction_date}) SCAN(table=bodo.retail.transactions, columns={'transaction_date': transaction_date}) diff --git a/tests/test_plan_refsols/richest_customer_key_per_region.txt b/tests/test_plan_refsols/richest_customer_key_per_region.txt index 67636a941..6295785f3 100644 --- a/tests/test_plan_refsols/richest_customer_key_per_region.txt +++ b/tests/test_plan_refsols/richest_customer_key_per_region.txt @@ -1,5 +1,5 @@ ROOT(columns=[('key', c_custkey)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=False) == 1:numeric, columns={'c_custkey': c_custkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first], allow_ties=False), columns={'c_custkey': c_custkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_custkey': t1.c_custkey, 'n_regionkey': t0.n_regionkey}) SCAN(table=tpch.NATION, columns={'n_nationkey': n_nationkey, 'n_regionkey': n_regionkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/richest_customer_per_region.txt b/tests/test_plan_refsols/richest_customer_per_region.txt index 426377814..c69dee905 100644 --- a/tests/test_plan_refsols/richest_customer_per_region.txt +++ b/tests/test_plan_refsols/richest_customer_per_region.txt @@ -1,5 +1,5 @@ ROOT(columns=[('region_name', r_name), ('nation_name', n_name), ('customer_name', c_name), ('balance', c_acctbal)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False) == 1:numeric, columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'n_name': n_name, 'r_name': r_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[n_regionkey], order=[(c_acctbal):desc_first, (c_name):asc_last], allow_ties=False), columns={'c_acctbal': c_acctbal, 'c_name': c_name, 'n_name': n_name, 'r_name': r_name}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'c_acctbal': t1.c_acctbal, 'c_name': t1.c_name, 'n_name': t0.n_name, 'n_regionkey': t0.n_regionkey, 'r_name': t0.r_name}) JOIN(condition=t0.r_regionkey == t1.n_regionkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_name': t1.n_name, 'n_nationkey': t1.n_nationkey, 'n_regionkey': t1.n_regionkey, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_10.txt b/tests/test_plan_refsols/simple_cross_10.txt index bdf4299bd..e287d253b 100644 --- a/tests/test_plan_refsols/simple_cross_10.txt +++ b/tests/test_plan_refsols/simple_cross_10.txt @@ -3,7 +3,7 @@ ROOT(columns=[('region_name', r_name), ('n_other_nations', DEFAULT_TO(n_rows, 0: SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) JOIN(condition=SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.n_name, None:unknown, 1:numeric, None:unknown) & t0.key_2 == t1.n_regionkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) - JOIN(condition=t1.r_name != t0.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_name != t1.r_name, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'key_2': t1.r_regionkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_regionkey': n_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_3.txt b/tests/test_plan_refsols/simple_cross_3.txt index d4688a855..ba7783649 100644 --- a/tests/test_plan_refsols/simple_cross_3.txt +++ b/tests/test_plan_refsols/simple_cross_3.txt @@ -1,6 +1,6 @@ ROOT(columns=[('supplier_nation', anything_n_name), ('customer_nation', anything_customer_nation), ('nation_combinations', n_rows)], orderings=[]) AGGREGATE(keys={'key_5': key_5, 'key_8': key_8, 'n_nationkey': n_nationkey, 'r_regionkey': r_regionkey}, aggregations={'anything_customer_nation': ANYTHING(customer_nation), 'anything_n_name': ANYTHING(n_name), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.s_nationkey == t0.n_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'customer_nation': t0.customer_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'customer_nation': t0.customer_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customer_nation': t0.customer_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customer_nation': t0.customer_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.key_8 == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'customer_nation': t0.customer_nation, 'key_5': t0.key_5, 'key_8': t0.key_8, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/simple_cross_4.txt b/tests/test_plan_refsols/simple_cross_4.txt index 59617616d..d37ef1bb2 100644 --- a/tests/test_plan_refsols/simple_cross_4.txt +++ b/tests/test_plan_refsols/simple_cross_4.txt @@ -2,6 +2,6 @@ ROOT(columns=[('region_name', r_name), ('n_other_regions', DEFAULT_TO(n_rows, 0: JOIN(condition=t0.r_regionkey == t1.r_regionkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'n_rows': t1.n_rows, 'r_name': t0.r_name}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) AGGREGATE(keys={'r_regionkey': r_regionkey}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.r_name != t0.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.r_name != t1.r_name & SLICE(t0.r_name, None:unknown, 1:numeric, None:unknown) == SLICE(t1.r_name, None:unknown, 1:numeric, None:unknown), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'r_regionkey': t0.r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name, 'r_regionkey': r_regionkey}) SCAN(table=tpch.REGION, columns={'r_name': r_name}) diff --git a/tests/test_plan_refsols/simple_cross_5.txt b/tests/test_plan_refsols/simple_cross_5.txt index 471fa206e..6b805c7bb 100644 --- a/tests/test_plan_refsols/simple_cross_5.txt +++ b/tests/test_plan_refsols/simple_cross_5.txt @@ -5,9 +5,9 @@ ROOT(columns=[('part_size', p_size), ('best_order_priority', o_orderpriority), ( FILTER(condition=STARTSWITH(p_container, 'LG':string), columns={'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_size': p_size}) PROJECT(columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'total_qty': KEEP_IF(DEFAULT_TO(sum_l_quantity, 0:numeric), DEFAULT_TO(sum_l_quantity, 0:numeric) > 0:numeric)}) - FILTER(condition=RANKING(args=[], partition=[p_size], order=[(KEEP_IF(DEFAULT_TO(sum_l_quantity, 0:numeric), DEFAULT_TO(sum_l_quantity, 0:numeric) > 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'sum_l_quantity': sum_l_quantity}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[p_size], order=[(KEEP_IF(DEFAULT_TO(sum_l_quantity, 0:numeric), DEFAULT_TO(sum_l_quantity, 0:numeric) > 0:numeric)):desc_first], allow_ties=False), columns={'o_orderpriority': o_orderpriority, 'p_size': p_size, 'sum_l_quantity': sum_l_quantity}) AGGREGATE(keys={'o_orderpriority': o_orderpriority, 'p_size': p_size}, aggregations={'sum_l_quantity': SUM(l_quantity)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_size == t0.p_size, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) + JOIN(condition=t0.l_partkey == t1.p_partkey & t0.p_size == t1.p_size, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_quantity': t0.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_partkey': t1.l_partkey, 'l_quantity': t1.l_quantity, 'o_orderpriority': t0.o_orderpriority, 'p_size': t0.p_size}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'o_orderkey': t1.o_orderkey, 'o_orderpriority': t1.o_orderpriority, 'p_size': t0.p_size}) LIMIT(limit=10:numeric, columns={'p_size': p_size}, orderings=[(p_size):asc_first]) diff --git a/tests/test_plan_refsols/simple_cross_6.txt b/tests/test_plan_refsols/simple_cross_6.txt index 1dbd71cd8..a0f9fead8 100644 --- a/tests/test_plan_refsols/simple_cross_6.txt +++ b/tests/test_plan_refsols/simple_cross_6.txt @@ -1,6 +1,6 @@ ROOT(columns=[('n_pairs', n_rows)], orderings=[]) AGGREGATE(keys={}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t1.c_mktsegment == t0.c_mktsegment & t1.c_nationkey == t0.c_nationkey & t1.c_custkey > t0.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) + JOIN(condition=t0.c_mktsegment == t1.c_mktsegment & t0.c_nationkey == t1.c_nationkey & t1.c_custkey > t0.c_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={}) FILTER(condition=c_acctbal > 9990:numeric, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) SCAN(table=tpch.CUSTOMER, columns={'c_acctbal': c_acctbal, 'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) FILTER(condition=c_acctbal > 9990:numeric, columns={'c_custkey': c_custkey, 'c_mktsegment': c_mktsegment, 'c_nationkey': c_nationkey}) diff --git a/tests/test_plan_refsols/simple_cross_8.txt b/tests/test_plan_refsols/simple_cross_8.txt index a2fe5605d..5760da5cc 100644 --- a/tests/test_plan_refsols/simple_cross_8.txt +++ b/tests/test_plan_refsols/simple_cross_8.txt @@ -1,6 +1,6 @@ ROOT(columns=[('supplier_region', anything_r_name), ('customer_region', anything_customer_region), ('region_combinations', n_rows)], orderings=[]) AGGREGATE(keys={'key_2': key_2, 'r_regionkey': r_regionkey}, aggregations={'anything_customer_region': ANYTHING(customer_region), 'anything_r_name': ANYTHING(r_name), 'n_rows': COUNT()}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.r_name == t0.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'customer_region': t0.customer_region, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.r_name == t1.r_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'customer_region': t0.customer_region, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customer_region': t0.customer_region, 'key_2': t0.key_2, 'l_suppkey': t1.l_suppkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'customer_region': t0.customer_region, 'key_2': t0.key_2, 'o_orderkey': t1.o_orderkey, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'customer_region': t0.customer_region, 'key_2': t0.key_2, 'r_name': t0.r_name, 'r_regionkey': t0.r_regionkey}) diff --git a/tests/test_plan_refsols/simplification_4.txt b/tests/test_plan_refsols/simplification_4.txt index 63e906927..3fe9c66cc 100644 --- a/tests/test_plan_refsols/simplification_4.txt +++ b/tests/test_plan_refsols/simplification_4.txt @@ -1,4 +1,4 @@ ROOT(columns=[('date_time', sbTxDateTime), ('s00', DATETIME(sbTxDateTime, 'start of week':string, '-8 weeks':string)), ('s01', False:bool), ('s02', ISIN(MONTH(sbTxDateTime), [1, 2, 3]:array[numeric])), ('s03', ISIN(MONTH(sbTxDateTime), [4, 5, 6]:array[numeric])), ('s04', ISIN(MONTH(sbTxDateTime), [7, 8, 9]:array[numeric])), ('s05', ISIN(MONTH(sbTxDateTime), [10, 11, 12]:array[numeric])), ('s06', False:bool), ('s07', False:bool), ('s08', MONTH(sbTxDateTime) < 4:numeric), ('s09', MONTH(sbTxDateTime) < 7:numeric), ('s10', MONTH(sbTxDateTime) < 10:numeric), ('s11', True:bool), ('s12', False:bool), ('s13', MONTH(sbTxDateTime) <= 3:numeric), ('s14', MONTH(sbTxDateTime) <= 6:numeric), ('s15', MONTH(sbTxDateTime) <= 9:numeric), ('s16', True:bool), ('s17', True:bool), ('s18', MONTH(sbTxDateTime) > 3:numeric), ('s19', MONTH(sbTxDateTime) > 6:numeric), ('s20', MONTH(sbTxDateTime) > 9:numeric), ('s21', False:bool), ('s22', True:bool), ('s23', MONTH(sbTxDateTime) >= 4:numeric), ('s24', MONTH(sbTxDateTime) >= 7:numeric), ('s25', MONTH(sbTxDateTime) >= 10:numeric), ('s26', False:bool), ('s27', True:bool), ('s28', NOT(ISIN(MONTH(sbTxDateTime), [1, 2, 3]:array[numeric]))), ('s29', NOT(ISIN(MONTH(sbTxDateTime), [4, 5, 6]:array[numeric]))), ('s30', NOT(ISIN(MONTH(sbTxDateTime), [7, 8, 9]:array[numeric]))), ('s31', NOT(ISIN(MONTH(sbTxDateTime), [10, 11, 12]:array[numeric]))), ('s32', True:bool), ('s33', 2024:numeric), ('s34', 3:numeric), ('s35', 8:numeric), ('s36', 13:numeric), ('s37', 12:numeric), ('s38', 45:numeric), ('s39', 59:numeric), ('s40', 2020:numeric), ('s41', 1:numeric), ('s42', 1:numeric), ('s43', 31:numeric), ('s44', 0:numeric), ('s45', 0:numeric), ('s46', 0:numeric), ('s47', 2023:numeric), ('s48', 3:numeric), ('s49', 7:numeric), ('s50', 4:numeric), ('s51', 6:numeric), ('s52', 55:numeric), ('s53', 0:numeric), ('s54', 1999:numeric), ('s55', 4:numeric), ('s56', 12:numeric), ('s57', 31:numeric), ('s58', 23:numeric), ('s59', 59:numeric), ('s60', 58:numeric), ('s61', False:bool), ('s62', False:bool), ('s63', False:bool), ('s64', True:bool), ('s65', True:bool), ('s66', True:bool), ('s67', False:bool), ('s68', False:bool), ('s69', False:bool), ('s70', True:bool), ('s71', True:bool), ('s72', True:bool), ('s73', False:bool), ('s74', False:bool), ('s75', True:bool), ('s76', True:bool), ('s77', False:bool), ('s78', True:bool), ('s79', False:bool), ('s80', True:bool), ('s81', True:bool), ('s82', False:bool), ('s83', True:bool), ('s84', False:bool), ('s85', False:bool), ('s86', True:bool), ('s87', False:bool), ('s88', False:bool), ('s89', True:bool), ('s90', True:bool), ('s91', False:bool), ('s92', True:bool), ('s93', False:bool), ('s94', False:bool), ('s95', True:bool), ('s96', True:bool), ('s97', datetime.date(2025, 2, 28):datetime), ('s98', datetime.date(2024, 12, 30):datetime), ('s99', datetime.date(2024, 12, 30):datetime), ('s100', datetime.date(2024, 12, 30):datetime), ('s101', datetime.date(2024, 12, 30):datetime), ('s102', datetime.date(2024, 12, 30):datetime), ('s103', datetime.date(2025, 1, 6):datetime), ('s104', datetime.date(2025, 1, 6):datetime)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[], order=[(sbTxDateTime):asc_last]) == 1:numeric | RANKING(args=[], partition=[], order=[(sbTxDateTime):desc_first]) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[], order=[(sbTxDateTime):desc_first]) | 1:numeric == RANKING(args=[], partition=[], order=[(sbTxDateTime):asc_last]), columns={'sbTxDateTime': sbTxDateTime}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime}) SCAN(table=main.sbTransaction, columns={'sbTxDateTime': sbTxDateTime}) diff --git a/tests/test_plan_refsols/singular3.txt b/tests/test_plan_refsols/singular3.txt index bac588a67..8a4767831 100644 --- a/tests/test_plan_refsols/singular3.txt +++ b/tests/test_plan_refsols/singular3.txt @@ -2,5 +2,5 @@ ROOT(columns=[('name', c_name)], orderings=[(o_orderdate):asc_last]) JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) LIMIT(limit=5:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}, orderings=[(c_name):asc_first]) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular4.txt b/tests/test_plan_refsols/singular4.txt index 82339695c..28eab0f99 100644 --- a/tests/test_plan_refsols/singular4.txt +++ b/tests/test_plan_refsols/singular4.txt @@ -2,6 +2,6 @@ ROOT(columns=[('name', c_name)], orderings=[(DEFAULT_TO(o_orderdate, datetime.da JOIN(condition=t0.c_custkey == t1.o_custkey, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_name': t0.c_name, 'o_orderdate': t1.o_orderdate}) FILTER(condition=c_nationkey == 6:numeric, columns={'c_custkey': c_custkey, 'c_name': c_name}) SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]) == 1:numeric, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(o_totalprice):desc_first]), columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate}) FILTER(condition=o_orderpriority == '1-URGENT':string, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_totalprice': o_totalprice}) SCAN(table=tpch.ORDERS, columns={'o_custkey': o_custkey, 'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority, 'o_totalprice': o_totalprice}) diff --git a/tests/test_plan_refsols/singular5.txt b/tests/test_plan_refsols/singular5.txt index 5c0c74ea9..b19354c9b 100644 --- a/tests/test_plan_refsols/singular5.txt +++ b/tests/test_plan_refsols/singular5.txt @@ -5,7 +5,7 @@ ROOT(columns=[('container', p_container), ('highest_price_ship_date', max_anythi FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) AGGREGATE(keys={'p_partkey': p_partkey}, aggregations={'anything_l_shipdate': ANYTHING(l_shipdate), 'n_rows': COUNT()}) - FILTER(condition=RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]) == 1:numeric, columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[p_container], order=[(l_extendedprice):desc_first, (l_shipdate):asc_last]), columns={'l_shipdate': l_shipdate, 'p_partkey': p_partkey}) JOIN(condition=t0.p_partkey == t1.l_partkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_extendedprice': t1.l_extendedprice, 'l_shipdate': t1.l_shipdate, 'p_container': t0.p_container, 'p_partkey': t0.p_partkey}) FILTER(condition=p_brand == 'Brand#13':string, columns={'p_container': p_container, 'p_partkey': p_partkey}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/singular6.txt b/tests/test_plan_refsols/singular6.txt index 3e9681540..fc9799117 100644 --- a/tests/test_plan_refsols/singular6.txt +++ b/tests/test_plan_refsols/singular6.txt @@ -4,7 +4,7 @@ ROOT(columns=[('name', c_name), ('receipt_date', l_receiptdate), ('nation_name', SCAN(table=tpch.CUSTOMER, columns={'c_custkey': c_custkey, 'c_name': c_name, 'c_nationkey': c_nationkey}) JOIN(condition=t0.s_nationkey == t1.n_nationkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'n_name': t1.n_name, 'o_custkey': t0.o_custkey}) JOIN(condition=t0.l_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_receiptdate': t0.l_receiptdate, 'o_custkey': t0.o_custkey, 's_nationkey': t1.s_nationkey}) - FILTER(condition=RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]) == 1:numeric, columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[o_custkey], order=[(l_receiptdate):asc_last, (l_extendedprice * 1:numeric - l_discount):desc_first]), columns={'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey, 'o_custkey': o_custkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_receiptdate': t1.l_receiptdate, 'l_suppkey': t1.l_suppkey, 'o_custkey': t0.o_custkey}) FILTER(condition=o_clerk == 'Clerk#000000017':string, columns={'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) SCAN(table=tpch.ORDERS, columns={'o_clerk': o_clerk, 'o_custkey': o_custkey, 'o_orderkey': o_orderkey}) diff --git a/tests/test_plan_refsols/singular7.txt b/tests/test_plan_refsols/singular7.txt index 857976233..3097aed50 100644 --- a/tests/test_plan_refsols/singular7.txt +++ b/tests/test_plan_refsols/singular7.txt @@ -3,7 +3,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', anything_p_name), ('n_ord FILTER(condition=s_nationkey == 20:numeric, columns={'s_name': s_name, 's_suppkey': s_suppkey}) SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) PROJECT(columns={'anything_p_name': anything_p_name, 'n_orders': DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric), 'ps_suppkey': ps_suppkey}) - FILTER(condition=RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]) == 1:numeric, columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ps_suppkey], order=[(DEFAULT_TO(KEEP_IF(count_l_suppkey, count_l_suppkey != 0:numeric), 0:numeric)):desc_first, (anything_p_name):asc_last]), columns={'anything_p_name': anything_p_name, 'count_l_suppkey': count_l_suppkey, 'ps_suppkey': ps_suppkey}) AGGREGATE(keys={'l_partkey': ps_partkey, 'ps_suppkey': ps_suppkey}, aggregations={'anything_p_name': ANYTHING(p_name), 'count_l_suppkey': COUNT(l_suppkey)}) JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_suppkey': t1.l_suppkey, 'p_name': t0.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) JOIN(condition=t0.ps_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'p_name': t1.p_name, 'ps_partkey': t0.ps_partkey, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/sqlite_udf_cumulative_distribution.txt b/tests/test_plan_refsols/sqlite_udf_cumulative_distribution.txt index 938ae5d84..2bfabae50 100644 --- a/tests/test_plan_refsols/sqlite_udf_cumulative_distribution.txt +++ b/tests/test_plan_refsols/sqlite_udf_cumulative_distribution.txt @@ -1,5 +1,5 @@ ROOT(columns=[('c', c), ('n', n_rows)], orderings=[(c):asc_first]) AGGREGATE(keys={'c': c}, aggregations={'n_rows': COUNT()}) PROJECT(columns={'c': ROUND(CUMULATIVE_DISTRIBUTION(args=[], partition=[], order=[(o_orderpriority):asc_last]), 4:numeric)}) - FILTER(condition=YEAR(o_orderdate) == 1998:numeric | o_orderpriority == '2-HIGH':string & YEAR(o_orderdate) == 1992:numeric, columns={'o_orderpriority': o_orderpriority}) + FILTER(condition=YEAR(o_orderdate) == 1992:numeric & o_orderpriority == '2-HIGH':string | YEAR(o_orderdate) == 1998:numeric, columns={'o_orderpriority': o_orderpriority}) SCAN(table=tpch.ORDERS, columns={'o_orderdate': o_orderdate, 'o_orderpriority': o_orderpriority}) diff --git a/tests/test_plan_refsols/supplier_best_part.txt b/tests/test_plan_refsols/supplier_best_part.txt index 4b815e991..57f2bace8 100644 --- a/tests/test_plan_refsols/supplier_best_part.txt +++ b/tests/test_plan_refsols/supplier_best_part.txt @@ -4,7 +4,7 @@ ROOT(columns=[('supplier_name', s_name), ('part_name', p_name), ('total_quantity SCAN(table=tpch.SUPPLIER, columns={'s_name': s_name, 's_nationkey': s_nationkey, 's_suppkey': s_suppkey}) FILTER(condition=n_name == 'FRANCE':string, columns={'n_nationkey': n_nationkey}) SCAN(table=tpch.NATION, columns={'n_name': n_name, 'n_nationkey': n_nationkey}) - FILTER(condition=RANKING(args=[], partition=[l_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False) == 1:numeric, columns={'l_suppkey': l_suppkey, 'n_rows': n_rows, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[l_suppkey], order=[(DEFAULT_TO(sum_l_quantity, 0:numeric)):desc_first], allow_ties=False), columns={'l_suppkey': l_suppkey, 'n_rows': n_rows, 'p_name': p_name, 'sum_l_quantity': sum_l_quantity}) JOIN(condition=t0.l_partkey == t1.p_partkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_suppkey': t0.l_suppkey, 'n_rows': t0.n_rows, 'p_name': t1.p_name, 'sum_l_quantity': t0.sum_l_quantity}) AGGREGATE(keys={'l_partkey': l_partkey, 'l_suppkey': l_suppkey}, aggregations={'n_rows': COUNT(), 'sum_l_quantity': SUM(l_quantity)}) FILTER(condition=YEAR(l_shipdate) == 1994:numeric & l_tax == 0:numeric, columns={'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/technograph_country_combination_analysis.txt b/tests/test_plan_refsols/technograph_country_combination_analysis.txt index 915f73bcc..72da660e6 100644 --- a/tests/test_plan_refsols/technograph_country_combination_analysis.txt +++ b/tests/test_plan_refsols/technograph_country_combination_analysis.txt @@ -1,12 +1,12 @@ ROOT(columns=[('factory_country', co_name), ('purchase_country', name_2), ('ir', ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric))], orderings=[(ROUND(DEFAULT_TO(sum_n_rows, 0:numeric) / DEFAULT_TO(n_rows, 0:numeric), 2:numeric)):desc_last], limit=5:numeric) - JOIN(condition=t0.co_id == t1.anything_co_id & t0._id_1 == t1.anything__id_3, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) + JOIN(condition=t0._id_1 == t1.anything__id_3 & t0.co_id == t1.anything_co_id, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'co_name': t0.co_name, 'n_rows': t1.n_rows, 'name_2': t0.name_2, 'sum_n_rows': t1.sum_n_rows}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_1': t1.co_id, 'co_id': t0.co_id, 'co_name': t0.co_name, 'name_2': t1.co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'anything__id_3': anything__id_3, 'anything_co_id': anything_co_id}, aggregations={'n_rows': COUNT(), 'sum_n_rows': SUM(KEEP_IF(count_in_device_id, count_in_device_id != 0:numeric))}) AGGREGATE(keys={'in_device_id': de_id}, aggregations={'anything__id_3': ANYTHING(_id_3), 'anything_co_id': ANYTHING(co_id), 'count_in_device_id': COUNT(in_device_id)}) JOIN(condition=t0.de_id == t1.in_device_id, type=LEFT, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t0.de_id, 'in_device_id': t1.in_device_id}) - JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t1.de_production_country_id == t0.co_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) + JOIN(condition=t0._id_3 == t1.de_purchase_country_id & t0.co_id == t1.de_production_country_id, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'_id_3': t0._id_3, 'co_id': t0.co_id, 'de_id': t1.de_id}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=PLURAL_ACCESS, columns={'_id_3': t1.co_id, 'co_id': t0.co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) SCAN(table=main.COUNTRIES, columns={'co_id': co_id}) diff --git a/tests/test_plan_refsols/technograph_hot_purchase_window.txt b/tests/test_plan_refsols/technograph_hot_purchase_window.txt index 8e8497536..0c10fa4ce 100644 --- a/tests/test_plan_refsols/technograph_hot_purchase_window.txt +++ b/tests/test_plan_refsols/technograph_hot_purchase_window.txt @@ -1,6 +1,6 @@ ROOT(columns=[('start_of_period', ca_dt), ('n_purchases', n_rows)], orderings=[(n_rows):desc_last, (ca_dt):asc_first], limit=1:numeric) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.calendar_day_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t1.ca_dt < DATETIME(t0.ca_dt, '+5 days':string) & t1.ca_dt >= t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=YEAR(ca_dt) == 2024:numeric, columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) diff --git a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt index 9dc81f52d..583f85de7 100644 --- a/tests/test_plan_refsols/technograph_monthly_incident_rate.txt +++ b/tests/test_plan_refsols/technograph_monthly_incident_rate.txt @@ -6,7 +6,7 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year_ca_dt, LPAD(month_ca_dt, 2 SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_production_country_id == t1.co_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.calendar_day_1 == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.calendar_day_1, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_production_country_id': t1.de_production_country_id}) JOIN(condition=t1.ca_dt >= DATETIME(t0.ca_dt, '-6 months':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'calendar_day_1': t1.ca_dt}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) @@ -16,7 +16,7 @@ ROOT(columns=[('month', JOIN_STRINGS('-':string, year_ca_dt, LPAD(month_ca_dt, 2 SCAN(table=main.COUNTRIES, columns={'co_id': co_id, 'co_name': co_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=DATETIME(t1.in_error_report_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) FILTER(condition=ISIN(YEAR(ca_dt), [2020, 2021]:array[unknown]), columns={'ca_dt': ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt index 990c9c24d..a7ab5ddb9 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_goldcopperstar.txt @@ -10,7 +10,7 @@ ROOT(columns=[('years_since_release', year_ca_dt - YEAR(anything_pr_release)), ( AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) JOIN(condition=t0.in_device_id == t1.de_id, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) + JOIN(condition=DATETIME(t1.in_error_report_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'in_device_id': t1.in_device_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_device_id': in_device_id, 'in_error_report_ts': in_error_report_ts}) SCAN(table=main.DEVICES, columns={'de_id': de_id, 'de_product_id': de_product_id}) @@ -18,7 +18,7 @@ ROOT(columns=[('years_since_release', year_ca_dt - YEAR(anything_pr_release)), ( SCAN(table=main.PRODUCTS, columns={'pr_id': pr_id, 'pr_name': pr_name}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.de_product_id == t1.pr_id, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'ca_dt': t0.ca_dt}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt, 'de_product_id': t1.de_product_id}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_product_id': de_product_id, 'de_purchase_ts': de_purchase_ts}) FILTER(condition=pr_name == 'GoldCopper-Star':string, columns={'pr_id': pr_id}) diff --git a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt index 7c5da2407..f07758b50 100644 --- a/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt +++ b/tests/test_plan_refsols/technograph_year_cumulative_incident_rate_overall.txt @@ -5,10 +5,10 @@ ROOT(columns=[('yr', year_ca_dt), ('cum_ir', ROUND(RELSUM(args=[DEFAULT_TO(sum_n JOIN(condition=t0.ca_dt == t1.ca_dt, type=LEFT, cardinality=SINGULAR_FILTER, reverse_cardinality=SINGULAR_ACCESS, columns={'ca_dt': t0.ca_dt, 'n_rows': t1.n_rows}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.de_purchase_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=DATETIME(t1.de_purchase_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.DEVICES, columns={'de_purchase_ts': de_purchase_ts}) AGGREGATE(keys={'ca_dt': ca_dt}, aggregations={'n_rows': COUNT()}) - JOIN(condition=t0.ca_dt == DATETIME(t1.in_error_report_ts, 'start of day':string), type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) + JOIN(condition=DATETIME(t1.in_error_report_ts, 'start of day':string) == t0.ca_dt, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'ca_dt': t0.ca_dt}) SCAN(table=main.CALENDAR, columns={'ca_dt': ca_dt}) SCAN(table=main.INCIDENTS, columns={'in_error_report_ts': in_error_report_ts}) diff --git a/tests/test_plan_refsols/time_threshold_reached.txt b/tests/test_plan_refsols/time_threshold_reached.txt index ba32986b7..aa53c4294 100644 --- a/tests/test_plan_refsols/time_threshold_reached.txt +++ b/tests/test_plan_refsols/time_threshold_reached.txt @@ -1,5 +1,5 @@ ROOT(columns=[('date_time', sbTxDateTime)], orderings=[(sbTxDateTime):asc_first]) - FILTER(condition=RANKING(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(pct_of_day):asc_last], allow_ties=False) == 1:numeric, columns={'sbTxDateTime': sbTxDateTime}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(pct_of_day):asc_last], allow_ties=False), columns={'sbTxDateTime': sbTxDateTime}) FILTER(condition=pct_of_day >= 50.0:numeric, columns={'pct_of_day': pct_of_day, 'sbTxDateTime': sbTxDateTime}) PROJECT(columns={'pct_of_day': 100.0:numeric * RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[(sbTxDateTime):asc_last], cumulative=True) / RELSUM(args=[sbTxShares], partition=[DATETIME(sbTxDateTime, 'start of day':string)], order=[]), 'sbTxDateTime': sbTxDateTime}) FILTER(condition=YEAR(sbTxDateTime) == 2023:numeric, columns={'sbTxDateTime': sbTxDateTime, 'sbTxShares': sbTxShares}) diff --git a/tests/test_plan_refsols/top_lineitems_info_2.txt b/tests/test_plan_refsols/top_lineitems_info_2.txt index aff71c57c..0e5939387 100644 --- a/tests/test_plan_refsols/top_lineitems_info_2.txt +++ b/tests/test_plan_refsols/top_lineitems_info_2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('order_key', l_orderkey), ('line_number', l_linenumber), ('part_size', p_size), ('supplier_nation', n_nationkey)], orderings=[(l_orderkey):asc_first, (l_linenumber):asc_first], limit=7:numeric) - JOIN(condition=t0.ps_partkey == t1.l_partkey & t0.supplier_key_11 == t1.l_suppkey & t1.l_partkey == t0.p_partkey & t1.l_suppkey == t0.ps_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'n_nationkey': t0.n_nationkey, 'p_size': t0.p_size}) + JOIN(condition=t0.p_partkey == t1.l_partkey & t0.ps_partkey == t1.l_partkey & t0.ps_suppkey == t1.l_suppkey & t0.supplier_key_11 == t1.l_suppkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t1.l_linenumber, 'l_orderkey': t1.l_orderkey, 'n_nationkey': t0.n_nationkey, 'p_size': t0.p_size}) JOIN(condition=t0.s_suppkey == t1.ps_suppkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t0.ps_suppkey, 'supplier_key_11': t1.ps_suppkey}) JOIN(condition=t0.n_nationkey == t1.s_nationkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t0.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey, 's_suppkey': t1.s_suppkey}) JOIN(condition=True:bool, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_ACCESS, columns={'n_nationkey': t1.n_nationkey, 'p_partkey': t0.p_partkey, 'p_size': t0.p_size, 'ps_suppkey': t0.ps_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q19.txt b/tests/test_plan_refsols/tpch_q19.txt index 709ee1ba4..9142a031f 100644 --- a/tests/test_plan_refsols/tpch_q19.txt +++ b/tests/test_plan_refsols/tpch_q19.txt @@ -1,6 +1,6 @@ ROOT(columns=[('REVENUE', DEFAULT_TO(sum_expr, 0:numeric))], orderings=[]) AGGREGATE(keys={}, aggregations={'sum_expr': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_partkey == t1.p_partkey & MONOTONIC(1:numeric, t1.p_size, 5:numeric) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & t1.p_brand == 'Brand#12':string | MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric) & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & t1.p_brand == 'Brand#23':string | MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & t1.p_brand == 'Brand#34':string, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) + JOIN(condition=t0.l_partkey == t1.p_partkey & t1.p_brand == 'Brand#34':string & ISIN(t1.p_container, ['LG CASE', 'LG BOX', 'LG PACK', 'LG PKG']:array[unknown]) & MONOTONIC(1:numeric, t1.p_size, 15:numeric) & MONOTONIC(20:numeric, t0.l_quantity, 30:numeric) | t1.p_brand == 'Brand#12':string & ISIN(t1.p_container, ['SM CASE', 'SM BOX', 'SM PACK', 'SM PKG']:array[unknown]) & MONOTONIC(1:numeric, t0.l_quantity, 11:numeric) & MONOTONIC(1:numeric, t1.p_size, 5:numeric) | t1.p_brand == 'Brand#23':string & ISIN(t1.p_container, ['MED BAG', 'MED BOX', 'MED PACK', 'MED PKG']:array[unknown]) & MONOTONIC(1:numeric, t1.p_size, 10:numeric) & MONOTONIC(10:numeric, t0.l_quantity, 20:numeric), type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice}) FILTER(condition=l_shipinstruct == 'DELIVER IN PERSON':string & ISIN(l_shipmode, ['AIR', 'AIR REG']:array[unknown]), columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity}) SCAN(table=tpch.LINEITEM, columns={'l_discount': l_discount, 'l_extendedprice': l_extendedprice, 'l_partkey': l_partkey, 'l_quantity': l_quantity, 'l_shipinstruct': l_shipinstruct, 'l_shipmode': l_shipmode}) SCAN(table=tpch.PART, columns={'p_brand': p_brand, 'p_container': p_container, 'p_partkey': p_partkey, 'p_size': p_size}) diff --git a/tests/test_plan_refsols/tpch_q2.txt b/tests/test_plan_refsols/tpch_q2.txt index 5f3a777cd..cf3b6ec04 100644 --- a/tests/test_plan_refsols/tpch_q2.txt +++ b/tests/test_plan_refsols/tpch_q2.txt @@ -1,5 +1,5 @@ ROOT(columns=[('S_ACCTBAL', s_acctbal), ('S_NAME', s_name), ('N_NAME', n_name), ('P_PARTKEY', p_partkey), ('P_MFGR', p_mfgr), ('S_ADDRESS', s_address), ('S_PHONE', s_phone), ('S_COMMENT', s_comment)], orderings=[(s_acctbal):desc_last, (n_name):asc_first, (s_name):asc_first, (p_partkey):asc_first], limit=10:numeric) - FILTER(condition=RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True) == 1:numeric, columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[ps_partkey], order=[(ps_supplycost):asc_last], allow_ties=True), columns={'n_name': n_name, 'p_mfgr': p_mfgr, 'p_partkey': p_partkey, 's_acctbal': s_acctbal, 's_address': s_address, 's_comment': s_comment, 's_name': s_name, 's_phone': s_phone}) JOIN(condition=t0.ps_suppkey == t1.s_suppkey, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'n_name': t1.n_name, 'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t0.ps_partkey, 'ps_supplycost': t0.ps_supplycost, 's_acctbal': t1.s_acctbal, 's_address': t1.s_address, 's_comment': t1.s_comment, 's_name': t1.s_name, 's_phone': t1.s_phone}) JOIN(condition=t0.p_partkey == t1.ps_partkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'p_mfgr': t0.p_mfgr, 'p_partkey': t0.p_partkey, 'ps_partkey': t1.ps_partkey, 'ps_suppkey': t1.ps_suppkey, 'ps_supplycost': t1.ps_supplycost}) FILTER(condition=p_size == 15:numeric & ENDSWITH(p_type, 'BRASS':string), columns={'p_mfgr': p_mfgr, 'p_partkey': p_partkey}) diff --git a/tests/test_plan_refsols/tpch_q21.txt b/tests/test_plan_refsols/tpch_q21.txt index e1b29a494..66e7a976c 100644 --- a/tests/test_plan_refsols/tpch_q21.txt +++ b/tests/test_plan_refsols/tpch_q21.txt @@ -8,13 +8,13 @@ ROOT(columns=[('S_NAME', anything_s_name), ('NUMWAIT', DEFAULT_TO(KEEP_IF(count_ JOIN(condition=t0.l_linenumber == t1.l_linenumber & t0.l_orderkey == t1.l_orderkey & t0.o_orderkey == t1.l_orderkey, type=ANTI, columns={'anything_l_suppkey': t0.anything_l_suppkey}) FILTER(condition=anything_o_orderstatus == 'F':string, columns={'anything_l_suppkey': anything_l_suppkey, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}) AGGREGATE(keys={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'o_orderkey': o_orderkey}, aggregations={'anything_l_suppkey': ANYTHING(l_suppkey), 'anything_o_orderstatus': ANYTHING(o_orderstatus)}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) + JOIN(condition=t0.l_suppkey != t1.l_suppkey & t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t0.o_orderkey, 'o_orderstatus': t0.o_orderstatus}) JOIN(condition=t0.l_orderkey == t1.o_orderkey, type=INNER, cardinality=SINGULAR_ACCESS, reverse_cardinality=PLURAL_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey, 'l_suppkey': t0.l_suppkey, 'o_orderkey': t1.o_orderkey, 'o_orderstatus': t1.o_orderstatus}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) SCAN(table=tpch.ORDERS, columns={'o_orderkey': o_orderkey, 'o_orderstatus': o_orderstatus}) SCAN(table=tpch.LINEITEM, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) - JOIN(condition=t1.l_suppkey != t0.l_suppkey & t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) + JOIN(condition=t0.l_suppkey != t1.l_suppkey & t0.l_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'l_linenumber': t0.l_linenumber, 'l_orderkey': t0.l_orderkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) SCAN(table=tpch.LINEITEM, columns={'l_commitdate': l_commitdate, 'l_linenumber': l_linenumber, 'l_orderkey': l_orderkey, 'l_receiptdate': l_receiptdate, 'l_suppkey': l_suppkey}) FILTER(condition=l_receiptdate > l_commitdate, columns={'l_orderkey': l_orderkey, 'l_suppkey': l_suppkey}) diff --git a/tests/test_plan_refsols/tpch_q5.txt b/tests/test_plan_refsols/tpch_q5.txt index 6e4183624..c1f939b52 100644 --- a/tests/test_plan_refsols/tpch_q5.txt +++ b/tests/test_plan_refsols/tpch_q5.txt @@ -1,6 +1,6 @@ ROOT(columns=[('N_NAME', anything_n_name), ('REVENUE', DEFAULT_TO(sum_value, 0:numeric))], orderings=[(DEFAULT_TO(sum_value, 0:numeric)):desc_last]) AGGREGATE(keys={'n_nationkey': n_nationkey}, aggregations={'anything_n_name': ANYTHING(n_name), 'sum_value': SUM(l_extendedprice * 1:numeric - l_discount)}) - JOIN(condition=t0.l_suppkey == t1.s_suppkey & t1.n_name == t0.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) + JOIN(condition=t0.l_suppkey == t1.s_suppkey & t0.n_name == t1.n_name, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_FILTER, columns={'l_discount': t0.l_discount, 'l_extendedprice': t0.l_extendedprice, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.o_orderkey == t1.l_orderkey, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'l_discount': t1.l_discount, 'l_extendedprice': t1.l_extendedprice, 'l_suppkey': t1.l_suppkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) JOIN(condition=t0.c_custkey == t1.o_custkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey, 'o_orderkey': t1.o_orderkey}) JOIN(condition=t0.n_nationkey == t1.c_nationkey, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'c_custkey': t1.c_custkey, 'n_name': t0.n_name, 'n_nationkey': t0.n_nationkey}) diff --git a/tests/test_plan_refsols/user_range_collection_4.txt b/tests/test_plan_refsols/user_range_collection_4.txt index 46b108cb7..7e1625f3b 100644 --- a/tests/test_plan_refsols/user_range_collection_4.txt +++ b/tests/test_plan_refsols/user_range_collection_4.txt @@ -1,6 +1,6 @@ ROOT(columns=[('part_size', part_size), ('name', p_name), ('retail_price', p_retailprice)], orderings=[(part_size):asc_first]) - FILTER(condition=RANKING(args=[], partition=[part_size], order=[(p_retailprice):asc_last], allow_ties=False) == 1:numeric, columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'part_size': part_size}) - JOIN(condition=t1.p_size == t0.part_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'part_size': t0.part_size}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[part_size], order=[(p_retailprice):asc_last], allow_ties=False), columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'part_size': part_size}) + JOIN(condition=t0.part_size == t1.p_size, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'p_name': t1.p_name, 'p_retailprice': t1.p_retailprice, 'part_size': t0.part_size}) GENERATED_TABLE(RangeCollection('sizes', part_size=range(1, 11))) FILTER(condition=CONTAINS(p_container, 'SM DRUM':string) & CONTAINS(p_name, 'azure':string) & CONTAINS(p_type, 'PLATED':string), columns={'p_name': p_name, 'p_retailprice': p_retailprice, 'p_size': p_size}) SCAN(table=tpch.PART, columns={'p_container': p_container, 'p_name': p_name, 'p_retailprice': p_retailprice, 'p_size': p_size, 'p_type': p_type}) diff --git a/tests/test_plan_refsols/wealthiest_supplier.txt b/tests/test_plan_refsols/wealthiest_supplier.txt index 30e45ccd2..fa050e49f 100644 --- a/tests/test_plan_refsols/wealthiest_supplier.txt +++ b/tests/test_plan_refsols/wealthiest_supplier.txt @@ -1,3 +1,3 @@ ROOT(columns=[('name', s_name), ('account_balance', s_acctbal)], orderings=[]) - FILTER(condition=RANKING(args=[], partition=[], order=[(s_acctbal):desc_first, (s_name):asc_last], allow_ties=False) == 1:numeric, columns={'s_acctbal': s_acctbal, 's_name': s_name}) + FILTER(condition=1:numeric == RANKING(args=[], partition=[], order=[(s_acctbal):desc_first, (s_name):asc_last], allow_ties=False), columns={'s_acctbal': s_acctbal, 's_name': s_name}) SCAN(table=tpch.SUPPLIER, columns={'s_acctbal': s_acctbal, 's_name': s_name}) diff --git a/tests/test_sql_refsols/correl_34_sqlite.sql b/tests/test_sql_refsols/correl_34_sqlite.sql index 7a92e3032..ec37c07c6 100644 --- a/tests/test_sql_refsols/correl_34_sqlite.sql +++ b/tests/test_sql_refsols/correl_34_sqlite.sql @@ -14,8 +14,8 @@ WITH _s13 AS ( orders.o_totalprice, partsupp.ps_partkey, partsupp.ps_suppkey, - AVG(CAST(orders.o_totalprice AS REAL)) OVER (PARTITION BY lineitem.l_linenumber, lineitem.l_orderkey, partsupp.ps_partkey, partsupp.ps_suppkey) AS _w, - COUNT(*) OVER (PARTITION BY lineitem.l_partkey, lineitem.l_suppkey) AS _w_2 + COUNT(*) OVER (PARTITION BY lineitem.l_partkey, lineitem.l_suppkey) AS _w, + AVG(CAST(orders.o_totalprice AS REAL)) OVER (PARTITION BY lineitem.l_linenumber, lineitem.l_orderkey, partsupp.ps_partkey, partsupp.ps_suppkey) AS _w_2 FROM tpch.partsupp AS partsupp JOIN tpch.supplier AS supplier ON partsupp.ps_suppkey = supplier.s_suppkey @@ -40,7 +40,7 @@ WITH _s13 AS ( FROM _t WHERE ( - _w < o_totalprice OR _w_2 = 1 + _w = 1 OR _w_2 < o_totalprice ) AND l_orderkey = o_orderkey AND l_partkey = ps_partkey diff --git a/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql b/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql index 30fcdcb3b..d750b8bd4 100644 --- a/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql +++ b/tests/test_sql_refsols/cryptbank_filter_count_34_rewrite_sqlite.sql @@ -2,4 +2,17 @@ SELECT COUNT(*) AS n FROM crbnk.accounts WHERE - a_open_ts = '2015-05-04 18:01:51' + CASE + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 3 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 1 + THEN 1 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 6 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 4 + THEN 2 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 9 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 7 + THEN 3 + WHEN CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) <= 12 + AND CAST(STRFTIME('%m', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) >= 10 + THEN 4 + END = CAST(STRFTIME('%d', DATETIME(a_open_ts, '+123456789 seconds')) AS INTEGER) diff --git a/tests/test_sql_refsols/simplification_4_mysql.sql b/tests/test_sql_refsols/simplification_4_mysql.sql index 897fd00f1..bdff7e8d1 100644 --- a/tests/test_sql_refsols/simplification_4_mysql.sql +++ b/tests/test_sql_refsols/simplification_4_mysql.sql @@ -1,8 +1,8 @@ WITH _t AS ( SELECT sbtxdatetime AS sbTxDateTime, - ROW_NUMBER() OVER (ORDER BY CASE WHEN sbtxdatetime IS NULL THEN 1 ELSE 0 END, sbtxdatetime) AS _w, - ROW_NUMBER() OVER (ORDER BY CASE WHEN sbtxdatetime IS NULL THEN 1 ELSE 0 END DESC, sbtxdatetime DESC) AS _w_2 + ROW_NUMBER() OVER (ORDER BY CASE WHEN sbtxdatetime IS NULL THEN 1 ELSE 0 END DESC, sbtxdatetime DESC) AS _w, + ROW_NUMBER() OVER (ORDER BY CASE WHEN sbtxdatetime IS NULL THEN 1 ELSE 0 END, sbtxdatetime) AS _w_2 FROM main.sbTransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS DATETIME)) = 2023 diff --git a/tests/test_sql_refsols/simplification_4_postgres.sql b/tests/test_sql_refsols/simplification_4_postgres.sql index 580a859fe..a69bb1acc 100644 --- a/tests/test_sql_refsols/simplification_4_postgres.sql +++ b/tests/test_sql_refsols/simplification_4_postgres.sql @@ -1,8 +1,8 @@ WITH _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (ORDER BY sbtxdatetime) AS _w, - ROW_NUMBER() OVER (ORDER BY sbtxdatetime DESC) AS _w_2 + ROW_NUMBER() OVER (ORDER BY sbtxdatetime DESC) AS _w, + ROW_NUMBER() OVER (ORDER BY sbtxdatetime) AS _w_2 FROM main.sbtransaction WHERE EXTRACT(YEAR FROM CAST(sbtxdatetime AS TIMESTAMP)) = 2023 diff --git a/tests/test_sql_refsols/simplification_4_sqlite.sql b/tests/test_sql_refsols/simplification_4_sqlite.sql index ad602230b..d6d4b0b3b 100644 --- a/tests/test_sql_refsols/simplification_4_sqlite.sql +++ b/tests/test_sql_refsols/simplification_4_sqlite.sql @@ -1,8 +1,8 @@ WITH _t AS ( SELECT sbtxdatetime, - ROW_NUMBER() OVER (ORDER BY sbtxdatetime) AS _w, - ROW_NUMBER() OVER (ORDER BY sbtxdatetime DESC) AS _w_2 + ROW_NUMBER() OVER (ORDER BY sbtxdatetime DESC) AS _w, + ROW_NUMBER() OVER (ORDER BY sbtxdatetime) AS _w_2 FROM main.sbtransaction WHERE CAST(STRFTIME('%Y', sbtxdatetime) AS INTEGER) = 2023 From 0a34d04db92fe381dfb969b5f29ef271a394e946 Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Tue, 10 Feb 2026 00:28:12 -0800 Subject: [PATCH 21/22] [RUN CI] --- pydough/conversion/hybrid_translator.py | 4 ++-- pydough/relational/relational_expressions/call_expression.py | 5 ----- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index 21d554f5c..09b1d0ea7 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -1739,11 +1739,11 @@ def convert_qdag_to_hybrid(self, node: PyDoughCollectionQDAG) -> HybridTree: self.run_correlation_extraction(hybrid) # 5. Run the de-correlation procedure. self.run_hybrid_decorrelation(hybrid) - # 5. Run the filter-merging procedure, then re-run ejecting aggregate + # 6. Run the filter-merging procedure, then re-run ejecting aggregate # inputs to clean up any new aggregates created by filter merging. self.run_filter_merging(hybrid) self.eject_aggregate_inputs(hybrid) - # 7. Run any final rewrites, such as turning MEDIAN into an average + # 8. Run any final rewrites, such as turning MEDIAN into an average # of the 1-2 median rows, that must happen after de-correlation. self.run_rewrites(hybrid) return hybrid diff --git a/pydough/relational/relational_expressions/call_expression.py b/pydough/relational/relational_expressions/call_expression.py index 3a5573d5d..4802404c0 100644 --- a/pydough/relational/relational_expressions/call_expression.py +++ b/pydough/relational/relational_expressions/call_expression.py @@ -50,11 +50,6 @@ def __init__( if self.op in (BAN, BOR, EQU, NEQ): self._inputs.sort(key=repr) - # If the operator is a commutative operation (AND, OR, EQUAL, NOT-EQUAL) - # first sort the inputs to normalize them. - if self.op in (BAN, BOR, EQU, NEQ): - self._inputs.sort(key=repr) - @property def op(self) -> PyDoughExpressionOperator: """ From cb9cb36fc19aef5e4777f0aa91a1abe97299796c Mon Sep 17 00:00:00 2001 From: knassre-bodo Date: Wed, 11 Feb 2026 10:02:20 -0800 Subject: [PATCH 22/22] WIP --- .../conversion/relational_simplification.py | 172 +++++++++++++++++- .../relational_expressions/call_expression.py | 2 +- .../defog_restaurants_gen14_sqlite.sql | 5 +- 3 files changed, 173 insertions(+), 6 deletions(-) diff --git a/pydough/conversion/relational_simplification.py b/pydough/conversion/relational_simplification.py index 103de3e2d..7b46278e1 100644 --- a/pydough/conversion/relational_simplification.py +++ b/pydough/conversion/relational_simplification.py @@ -1164,7 +1164,69 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(True, expr.data_type) - output_predicates.not_negative = True + # If any of the two arguments are opposites, replace the entire + # AND expression with False. For example: + # (x >= y) & (x < y) -> False + for i in range(len(expr.inputs)): + for j in range(i + 1, len(expr.inputs)): + if are_opposites(expr.inputs[i], expr.inputs[j]): + output_expr = LiteralExpression(False, expr.data_type) + + # If all the arguments are ANDs that have overlap, create an OR + # between the overlapping parts vs the rest of the arguments + # that are AND-ed together. + # For example: (x | y) & (x | z) -> x | (y & z) + if all( + isinstance(arg, CallExpression) and arg.op == pydop.BOR + for arg in expr.inputs + ): + common_args: set[RelationalExpression] = set.intersection( + *[ + set(arg.inputs) + for arg in expr.inputs + if isinstance(arg, CallExpression) + ] + ) + if len(common_args) > 0: + common_arg_expr: RelationalExpression + if len(common_args) == 1: + common_arg_expr = next(iter(common_args)) + else: + common_arg_expr = CallExpression( + pydop.BOR, expr.data_type, list(common_args) + ) + conjunctions: list[RelationalExpression] = [] + for arg in expr.inputs: + assert isinstance(arg, CallExpression) + conj_args: list[RelationalExpression] = list( + set(arg.inputs) - common_args + ) + if len(conj_args) == 0: + continue + elif len(conj_args) == 1: + conjunctions.append(next(iter(conj_args))) + else: + conjunctions.append( + CallExpression(pydop.BAN, expr.data_type, conj_args) + ) + + disjunction_expr: RelationalExpression + if len(conjunctions) == 0: + output_expr = common_arg_expr + else: + if len(conjunctions) == 1: + disjunction_expr = next(iter(conjunctions)) + else: + disjunction_expr = CallExpression( + pydop.BOR, expr.data_type, conjunctions + ) + output_expr = CallExpression( + pydop.BAN, + expr.data_type, + [common_arg_expr, disjunction_expr], + ) + + output_predicates.not_negative = False # X | Y is True if any of the arguments are Truth-y literals, and False # if all of the arguments are False-y literals. @@ -1180,6 +1242,70 @@ def simplify_function_call( for arg in expr.inputs ): output_expr = LiteralExpression(False, expr.data_type) + # If any of the two arguments are opposites, replace the entire + # OR expression with True. For example: + # (x >= y) | (x < y) -> True + for i in range(len(expr.inputs)): + for j in range(i + 1, len(expr.inputs)): + if are_opposites(expr.inputs[i], expr.inputs[j]): + output_expr = LiteralExpression(True, expr.data_type) + output_predicates.positive = True + break + + # If all the arguments are ORs that have overlap, create an AND + # between the overlapping parts vs the rest of the arguments + # that are OR-ed together. + # For example: (x & y) | (x & z) -> x & (y | z) + if all( + isinstance(arg, CallExpression) and arg.op == pydop.BAN + for arg in expr.inputs + ): + common_args = set.intersection( + *[ + set(arg.inputs) + for arg in expr.inputs + if isinstance(arg, CallExpression) + ] + ) + breakpoint() + if len(common_args) > 0: + if len(common_args) == 1: + common_arg_expr = next(iter(common_args)) + else: + common_arg_expr = CallExpression( + pydop.BAN, expr.data_type, list(common_args) + ) + disjunctions: list[RelationalExpression] = [] + for arg in expr.inputs: + assert isinstance(arg, CallExpression) + disj_args: list[RelationalExpression] = list( + set(arg.inputs) - common_args + ) + if len(disj_args) == 0: + continue + elif len(disj_args) == 1: + disjunctions.append(next(iter(disj_args))) + else: + disjunctions.append( + CallExpression(pydop.BOR, expr.data_type, disj_args) + ) + + conjunction_expr: RelationalExpression + if len(disjunctions) == 0: + output_expr = common_arg_expr + else: + if len(disjunctions) == 1: + conjunction_expr = next(iter(disjunctions)) + else: + conjunction_expr = CallExpression( + pydop.BOR, expr.data_type, disjunctions + ) + output_expr = CallExpression( + pydop.BOR, + expr.data_type, + [common_arg_expr, conjunction_expr], + ) + output_predicates.not_negative = True # NOT(x) is True if x is a False-y literal, and False if x is a @@ -1761,3 +1887,47 @@ def simplify_expressions( """ simplifier: SimplificationVisitor = SimplificationVisitor(session) node.accept(simplifier) + + +def are_opposites(expr1: RelationalExpression, expr2: RelationalExpression) -> bool: + """ + Helper function to determine whether two expressions are opposites of each + other. For example, x >= y and x < y are opposites, as are x = y and + x != y. + + Args: + `expr1`: The first expression to compare. + `expr2`: The second expression to compare. + + Returns: + A boolean indicating whether the two expressions are opposites of each + other. + """ + if not (isinstance(expr1, CallExpression) and isinstance(expr2, CallExpression)): + return False + match (expr1.op, expr2.op): + # e.g. (a = b) is the opposite of (a != b) + case (pydop.EQU, pydop.NEQ) | (pydop.NEQ, pydop.EQU): + return set(expr1.inputs) == set(expr2.inputs) + + # e.g. (a < b) is the opposite of (a >= b) + case ( + (pydop.LET, pydop.GEQ) + | (pydop.LEQ, pydop.GRT) + | (pydop.GEQ, pydop.LET) + | (pydop.GRT, pydop.LEQ) + ): + return expr1.inputs == expr2.inputs + + # e.g. (a < b) is the opposite of (b <= a) + case ( + (pydop.LET, pydop.LEQ) + | (pydop.LEQ, pydop.LET) + | (pydop.GEQ, pydop.GRT) + | (pydop.GRT, pydop.GEQ) + ): + return expr1.inputs == list(reversed(expr2.inputs)) + + # TODO: add more cases + case _: + return False diff --git a/pydough/relational/relational_expressions/call_expression.py b/pydough/relational/relational_expressions/call_expression.py index 4802404c0..758cfb8d7 100644 --- a/pydough/relational/relational_expressions/call_expression.py +++ b/pydough/relational/relational_expressions/call_expression.py @@ -48,7 +48,7 @@ def __init__( # If the operator is a commutative operation (AND, OR, EQUAL, NOT-EQUAL) # first sort the inputs to normalize them. if self.op in (BAN, BOR, EQU, NEQ): - self._inputs.sort(key=repr) + self._inputs = sorted(set(self._inputs), key=repr) @property def op(self) -> PyDoughExpressionOperator: diff --git a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql index d03d55431..543bb54af 100644 --- a/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql +++ b/tests/test_sql_refsols/defog_restaurants_gen14_sqlite.sql @@ -2,7 +2,4 @@ SELECT CAST(SUM(LOWER(food_type) = 'vegan') AS REAL) / NULLIF(SUM(LOWER(food_type) <> 'vegan'), 0) AS ratio FROM main.restaurant WHERE - LOWER(city_name) = 'san francisco' - AND ( - LOWER(food_type) <> 'vegan' OR LOWER(food_type) = 'vegan' - ) + LOWER(city_name) = 'san francisco' AND LOWER(food_type) <> 'vegan'