From 1e68114c8cfb4746df913e54c857c449d72bbb60 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 2 Nov 2023 11:58:00 +0100 Subject: [PATCH 01/32] add operation converter class --- .../duckdb/common/enums/optimizer_type.hpp | 1 + .../duckdb/optimizer/operation_converter.hpp | 27 +++++++++++++++++++ src/optimizer/join_order/relation_manager.cpp | 5 +++- src/optimizer/operation_converter.cpp | 14 ++++++++++ src/optimizer/optimizer.cpp | 7 +++++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 src/include/duckdb/optimizer/operation_converter.hpp create mode 100644 src/optimizer/operation_converter.cpp diff --git a/src/include/duckdb/common/enums/optimizer_type.hpp b/src/include/duckdb/common/enums/optimizer_type.hpp index 873d9b2dc157..57ba16990874 100644 --- a/src/include/duckdb/common/enums/optimizer_type.hpp +++ b/src/include/duckdb/common/enums/optimizer_type.hpp @@ -21,6 +21,7 @@ enum class OptimizerType : uint32_t { IN_CLAUSE, JOIN_ORDER, DELIMINATOR, + OPERATION_CONVERTER, UNNEST_REWRITER, UNUSED_COLUMNS, STATISTICS_PROPAGATION, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp new file mode 100644 index 000000000000..c3a218b90e30 --- /dev/null +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/optimizer/operation_converter.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/optimizer/column_binding_replacer.hpp" + +namespace duckdb { + +//! The Operation Converter converts Set operations to joins when possible +class OperationConverter { +public: + OperationConverter() { + } + //! Perform DelimJoin elimination + unique_ptr Optimize(unique_ptr op); + +private: + +}; + +} // namespace duckdb diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 9294cd07afc1..45c7b2c266be 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -90,7 +90,10 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { switch (op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: - case LogicalOperatorType::LOGICAL_INTERSECT: + case LogicalOperatorType::LOGICAL_INTERSECT: { + auto a = 0; + return true; + } case LogicalOperatorType::LOGICAL_DELIM_JOIN: case LogicalOperatorType::LOGICAL_ANY_JOIN: case LogicalOperatorType::LOGICAL_ASOF_JOIN: diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp new file mode 100644 index 000000000000..2e7bdd311ff8 --- /dev/null +++ b/src/optimizer/operation_converter.cpp @@ -0,0 +1,14 @@ +#include "duckdb/optimizer/operation_converter.hpp" + +#include "duckdb/planner/expression/bound_cast_expression.hpp" +#include "duckdb/planner/expression/bound_conjunction_expression.hpp" +#include "duckdb/planner/operator/logical_delim_get.hpp" + +namespace duckdb { + + +unique_ptr OperationConverter::Optimize(unique_ptr op) { + return op; +} + +} // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 0a66f0007761..17e13e6e98ba 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -9,6 +9,7 @@ #include "duckdb/optimizer/compressed_materialization.hpp" #include "duckdb/optimizer/cse_optimizer.hpp" #include "duckdb/optimizer/deliminator.hpp" +#include "duckdb/optimizer/operation_converter.hpp" #include "duckdb/optimizer/expression_heuristics.hpp" #include "duckdb/optimizer/filter_pullup.hpp" #include "duckdb/optimizer/filter_pushdown.hpp" @@ -119,6 +120,12 @@ unique_ptr Optimizer::Optimize(unique_ptr plan plan = deliminator.Optimize(std::move(plan)); }); + // Convert setop operations to joins if possible + RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { + OperationConverter optimizer; + plan = optimizer.Optimize(std::move(plan)); + }); + // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns RunOptimizer(OptimizerType::JOIN_ORDER, [&]() { From 337e73ed43dc15e8dd949ddcaefd178202b56fae Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 2 Nov 2023 12:02:53 +0100 Subject: [PATCH 02/32] it compiles --- src/optimizer/CMakeLists.txt | 1 + src/optimizer/operation_converter.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 53f57e1c168d..39f3f84caccd 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -14,6 +14,7 @@ add_library_unity( compressed_materialization.cpp cse_optimizer.cpp deliminator.cpp + operation_converter.cpp unnest_rewriter.cpp column_lifetime_analyzer.cpp expression_heuristics.cpp diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 2e7bdd311ff8..8e0cd7cbc82b 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,7 +8,7 @@ namespace duckdb { unique_ptr OperationConverter::Optimize(unique_ptr op) { - return op; + return std::move(op); } } // namespace duckdb From 8c4c3eec640be40481f9869d5322083af8ffd0a5 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 13:53:59 +0100 Subject: [PATCH 03/32] convert intersect and except to logical comparison joins --- src/optimizer/operation_converter.cpp | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 8e0cd7cbc82b..3759ce8d0e7d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,6 +8,36 @@ namespace duckdb { unique_ptr OperationConverter::Optimize(unique_ptr op) { + for (auto &child : op->children) { + child = Optimize(std::move(child)); + } + switch (op->type) { + case LogicalOperatorType::LOGICAL_INTERSECT: + case LogicalOperatorType::LOGICAL_EXCEPT: { + auto left = std::move(op->children[0]); + auto right = std::move(op->children[1]); + D_ASSERT(left->expressions.size() == right->expressions.size()); + vector conditions; + // create equality condition for all columns + for (idx_t i = 0; i < left->expressions.size(); i++) { + JoinCondition cond; + cond.left = left->expressions[i]->Copy(); + cond.right = right->expressions[i]->Copy(); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + + auto join_op = make_uniq(join_type); + join_op->children.push_back(std::move(left)); + join_op->children.push_back(std::move(right)); + join_op->conditions = std::move(conditions); + + op = std::move(join_op); + } + default: + break; + } return std::move(op); } From b461be2d14f6f980389996feb752cdcc51ad72d3 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 17:04:08 +0100 Subject: [PATCH 04/32] it works. Thanks @lnkuiper --- src/common/enums/optimizer_type.cpp | 1 + .../duckdb/optimizer/operation_converter.hpp | 7 ++-- src/optimizer/operation_converter.cpp | 35 +++++++++++++------ src/optimizer/optimizer.cpp | 4 +-- .../optimizer/setops/operation_converter.test | 13 +++++++ 5 files changed, 45 insertions(+), 15 deletions(-) create mode 100644 test/optimizer/setops/operation_converter.test diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp index 480e9eb51a95..c5a6cc4bdbbe 100644 --- a/src/common/enums/optimizer_type.cpp +++ b/src/common/enums/optimizer_type.cpp @@ -16,6 +16,7 @@ static DefaultOptimizerType internal_optimizer_types[] = { {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, {"regex_range", OptimizerType::REGEX_RANGE}, {"in_clause", OptimizerType::IN_CLAUSE}, + {"operation_converter", OptimizerType::OPERATION_CONVERTER}, {"join_order", OptimizerType::JOIN_ORDER}, {"deliminator", OptimizerType::DELIMINATOR}, {"unnest_rewriter", OptimizerType::UNNEST_REWRITER}, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index c3a218b90e30..2c1495cc6a23 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,13 +15,14 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: - OperationConverter() { + OperationConverter(LogicalOperator &root) : root(root) { + root.ResolveOperatorTypes(); } //! Perform DelimJoin elimination - unique_ptr Optimize(unique_ptr op); + void Optimize(unique_ptr &op); + LogicalOperator &root; private: - }; } // namespace duckdb diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 3759ce8d0e7d..a6b3d2ae07e4 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,39 +6,54 @@ namespace duckdb { - -unique_ptr OperationConverter::Optimize(unique_ptr op) { +void OperationConverter::Optimize(unique_ptr &op) { for (auto &child : op->children) { - child = Optimize(std::move(child)); + Optimize(child); } switch (op->type) { case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { - auto left = std::move(op->children[0]); - auto right = std::move(op->children[1]); - D_ASSERT(left->expressions.size() == right->expressions.size()); + auto &left = op->children[0]; + auto &right = op->children[1]; + auto left_bindings = left->GetColumnBindings(); + auto right_bindings = right->GetColumnBindings(); + D_ASSERT(left_bindings.size() == right_bindings.size()); vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left->expressions.size(); i++) { + for (idx_t i = 0; i < left_bindings.size(); i++) { JoinCondition cond; - cond.left = left->expressions[i]->Copy(); - cond.right = right->expressions[i]->Copy(); + cond.left = make_uniq(left->types[i], left_bindings[i]); + cond.right = make_uniq(right->types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + auto old_bindings = op->GetColumnBindings(); + auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); join_op->children.push_back(std::move(right)); join_op->conditions = std::move(conditions); + // update the op so that it is the join op. op = std::move(join_op); + + // now perform column binding replacement + auto new_bindings = op->GetColumnBindings(); + D_ASSERT(old_bindings.size() == new_bindings.size()); + vector replacement_bindings; + for (idx_t i = 0; i < old_bindings.size(); i++) { + replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); + } + + auto binding_replacer = ColumnBindingReplacer(); + binding_replacer.replacement_bindings = replacement_bindings; + binding_replacer.VisitOperator(root); } default: break; } - return std::move(op); } } // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 17e13e6e98ba..3a9e709c26ad 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter optimizer; - plan = optimizer.Optimize(std::move(plan)); + OperationConverter optimizer(*plan); + optimizer.Optimize(plan); }); // then we perform the join ordering optimization diff --git a/test/optimizer/setops/operation_converter.test b/test/optimizer/setops/operation_converter.test new file mode 100644 index 000000000000..d2887a8b9c99 --- /dev/null +++ b/test/optimizer/setops/operation_converter.test @@ -0,0 +1,13 @@ +# name: test/optimizer/setops/operation_converter.test +# description: converting intersect/except to semi anti +# group: [setops] + +statement ok +create table left_table as select range as a from range(100); + +statement ok +create table right_table as select range*2 as b from range(10000); + + +explain select * from left_table intersect select * from right_table; + From 04ce6a3fff763aa54665bb9c83c25ca368fa99da Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 10 Nov 2023 17:08:17 +0100 Subject: [PATCH 05/32] remove unused variable --- src/optimizer/join_order/relation_manager.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 45c7b2c266be..985028bb9e37 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -91,7 +91,6 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { - auto a = 0; return true; } case LogicalOperatorType::LOGICAL_DELIM_JOIN: From 8d6e6b6def9dd874bf420a79cb4502bb15ed1435 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 13 Nov 2023 09:33:14 +0100 Subject: [PATCH 06/32] no unrecognized parameter --- test/optimizer/setops/operation_converter.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/optimizer/setops/operation_converter.test b/test/optimizer/setops/operation_converter.test index d2887a8b9c99..8bc525f3c840 100644 --- a/test/optimizer/setops/operation_converter.test +++ b/test/optimizer/setops/operation_converter.test @@ -8,6 +8,6 @@ create table left_table as select range as a from range(100); statement ok create table right_table as select range*2 as b from range(10000); - -explain select * from left_table intersect select * from right_table; +statement ok +select * from left_table intersect select * from right_table; From 193b7d8c305be2a969840b13617cf2ed8f34667f Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 10:51:00 +0100 Subject: [PATCH 07/32] tidy fix --- src/common/enum_util.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index f1c62822d8b7..4a9b19da8eea 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -3560,6 +3560,8 @@ const char* EnumUtil::ToChars(OptimizerType value) { return "JOIN_ORDER"; case OptimizerType::DELIMINATOR: return "DELIMINATOR"; + case OptimizerType::OPERATION_CONVERTER: + return "OPERATION_CONVERTER"; case OptimizerType::UNNEST_REWRITER: return "UNNEST_REWRITER"; case OptimizerType::UNUSED_COLUMNS: @@ -3613,6 +3615,9 @@ OptimizerType EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "DELIMINATOR")) { return OptimizerType::DELIMINATOR; } + if (StringUtil::Equals(value, "OPERATION_CONVERTER")) { + return OptimizerType::OPERATION_CONVERTER; + } if (StringUtil::Equals(value, "UNNEST_REWRITER")) { return OptimizerType::UNNEST_REWRITER; } From 77c8bfb5e988ad0199eeda4e2f9f0c355f4e7581 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 14:18:21 +0100 Subject: [PATCH 08/32] change test file. Figure out this logical execute stuff --- src/optimizer/join_order/relation_manager.cpp | 4 ++++ .../{ => setops}/pushdown_set_op.test | 24 +++++++++---------- 2 files changed, 16 insertions(+), 12 deletions(-) rename test/optimizer/{ => setops}/pushdown_set_op.test (81%) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 985028bb9e37..82b7ce3f0e45 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -9,6 +9,7 @@ #include "duckdb/planner/operator/list.hpp" #include +#include "iostream" namespace duckdb { @@ -51,6 +52,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr:.*INTERSECT.* +logical_opt :.*SEMI.* # intersect is empty if either side is empty query II explain select 42 intersect select 42 where 1=0; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select 42 where 1=0 intersect select 42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* # except is empty if LHS is empty query II explain select 42 where 1=0 except select 42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II explain select 42 except select 42 where 1=0; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* # now pushdown subquery with set ops query II explain select * from (select 42 intersect select 42) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 42 intersect select 43) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 43 intersect select 42) tbl(i) where i=42; ---- -logical_opt :.*INTERSECT.* +logical_opt :.*SEMI.* query II explain select * from (select 42 except select 42) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query II explain select * from (select 42 except select 43) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query II explain select * from (select 43 except select 42) tbl(i) where i=42; ---- -logical_opt :.*EXCEPT.* +logical_opt :.*ANTI.* query I select 42 intersect select 42; From 7ef936f22ec80ff86392aefd35394e5b2a1b0f71 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 14:47:34 +0100 Subject: [PATCH 09/32] remove cout --- src/optimizer/join_order/relation_manager.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index b53007cb9c40..ea1698f86054 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -9,7 +9,6 @@ #include "duckdb/planner/operator/list.hpp" #include -#include "iostream" namespace duckdb { @@ -52,9 +51,6 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr Date: Mon, 13 Nov 2023 15:43:58 +0100 Subject: [PATCH 10/32] fix resolve types error --- src/include/duckdb/optimizer/operation_converter.hpp | 7 ++++++- src/optimizer/optimizer.cpp | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 2c1495cc6a23..e40f648a2983 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -16,7 +16,12 @@ namespace duckdb { class OperationConverter { public: OperationConverter(LogicalOperator &root) : root(root) { - root.ResolveOperatorTypes(); + switch (root.type) { + case LogicalOperatorType::LOGICAL_EXECUTE: + break; + default: + root.ResolveOperatorTypes(); + } } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 3a9e709c26ad..37677af27552 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter optimizer(*plan); - optimizer.Optimize(plan); + OperationConverter converter(*plan); + converter.Optimize(plan); }); // then we perform the join ordering optimization From 55c0d053ea56d06c27b1b19a6ba082e77c83c3a3 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 16:37:10 +0100 Subject: [PATCH 11/32] move test group --- test/optimizer/setops/pushdown_set_op.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index e5e10f7b50be..1b98d31a690f 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -1,6 +1,6 @@ # name: test/optimizer/setops/pushdown_set_op.test # description: Pushdown set operations -# group: [optimizer] +# group: [setops] statement ok PRAGMA explain_output = 'OPTIMIZED_ONLY' From 4e02cbc06918760f9a01ab83e0d24f8831eb4d53 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 17:58:03 +0100 Subject: [PATCH 12/32] figuring out why this execute is being an issue --- .../duckdb/optimizer/operation_converter.hpp | 23 ++++++++++++++----- src/optimizer/join_order/relation_manager.cpp | 3 +++ src/optimizer/operation_converter.cpp | 5 ++++ src/planner/logical_operator.cpp | 2 +- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index e40f648a2983..6c6a4b959470 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,13 +15,24 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: + OperationConverter(LogicalOperator &root) : root(root) { - switch (root.type) { - case LogicalOperatorType::LOGICAL_EXECUTE: - break; - default: - root.ResolveOperatorTypes(); - } + //! don't want to resolve execute because execute is assumed to be resolved already. + //! and the types are deleted if you call resolve types on a logical execute + //! this is bad, because when you want to add a relation in the join order optimizer, + //! and the execute doesn't have any types, then no table references are generated. + //! causing the D_ASSERT in relation_manager.cpp:60 to break; + + //! but we do need to resolve all types so that the types of a projection match the + //! size of the column bindings. +// auto &op = root; +// while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { +// root = root.children[0]; +// } + + // What I want to know is why a logical execute is planned and what it is used for exactly. + // It doesn't make a lot of sense to me. + op.ResolveOperatorTypes(); } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index ea1698f86054..37d5f7ae99a3 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -56,6 +56,9 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr table_references; + if (op.type == LogicalOperatorType::LOGICAL_EXECUTE) { + auto stop_here = "stop here"; + } LogicalJoin::GetTableReferences(op, table_references); D_ASSERT(table_references.size() > 0); for (auto &reference : table_references) { diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index a6b3d2ae07e4..41d38afae12d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -18,6 +18,11 @@ void OperationConverter::Optimize(unique_ptr &op) { auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); +// if (left->types.size() != left_bindings.size()) { +// auto cool = "a"; +// } + D_ASSERT(left->types.size() == left_bindings.size()); + D_ASSERT(right->types.size() == right_bindings.size()); vector conditions; // create equality condition for all columns for (idx_t i = 0; i < left_bindings.size(); i++) { diff --git a/src/planner/logical_operator.cpp b/src/planner/logical_operator.cpp index 8c3c4c93b756..03e9e88be069 100644 --- a/src/planner/logical_operator.cpp +++ b/src/planner/logical_operator.cpp @@ -42,8 +42,8 @@ string LogicalOperator::ParamsToString() const { } void LogicalOperator::ResolveOperatorTypes() { - types.clear(); + // first resolve child types for (auto &child : children) { child->ResolveOperatorTypes(); From ec6e36ecfd832e7130030d9988ca247fb9d6bf26 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 13 Nov 2023 18:06:48 +0100 Subject: [PATCH 13/32] more comments to pick up on --- src/include/duckdb/optimizer/operation_converter.hpp | 7 +++++-- src/include/duckdb/planner/operator/logical_execute.hpp | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 6c6a4b959470..064eba12d0d3 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -17,7 +17,7 @@ class OperationConverter { public: OperationConverter(LogicalOperator &root) : root(root) { - //! don't want to resolve execute because execute is assumed to be resolved already. + //! We don't want to resolve execute because execute is assumed to be resolved already. //! and the types are deleted if you call resolve types on a logical execute //! this is bad, because when you want to add a relation in the join order optimizer, //! and the execute doesn't have any types, then no table references are generated. @@ -25,6 +25,9 @@ class OperationConverter { //! but we do need to resolve all types so that the types of a projection match the //! size of the column bindings. + + + //! Just need to know what the logical execute does. So confused. // auto &op = root; // while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { // root = root.children[0]; @@ -32,7 +35,7 @@ class OperationConverter { // What I want to know is why a logical execute is planned and what it is used for exactly. // It doesn't make a lot of sense to me. - op.ResolveOperatorTypes(); + root.ResolveOperatorTypes(); } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index 62eb28f2e7f0..2bd77586bd39 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -34,6 +34,7 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { + auto whatever = 'sdfgs'; // already resolved } vector GetColumnBindings() override { From 0c2151f7241e50d479bd42822871a0e3cf16bbd3 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 11:17:15 +0100 Subject: [PATCH 14/32] logical execute should still resolve types in case resolveTypes is called in the optimizer. GetTableReferences(), used in the join order optimizer, relies on the types array having elements. The Operation Converter calls resolveTypes. --- .../duckdb/optimizer/operation_converter.hpp | 22 +------------------ .../planner/operator/logical_execute.hpp | 2 +- src/optimizer/operation_converter.cpp | 8 ++++--- src/planner/planner.cpp | 1 - 4 files changed, 7 insertions(+), 26 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index 064eba12d0d3..ff560d2d0cf2 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,28 +15,8 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: + OperationConverter(LogicalOperator &root); - OperationConverter(LogicalOperator &root) : root(root) { - //! We don't want to resolve execute because execute is assumed to be resolved already. - //! and the types are deleted if you call resolve types on a logical execute - //! this is bad, because when you want to add a relation in the join order optimizer, - //! and the execute doesn't have any types, then no table references are generated. - //! causing the D_ASSERT in relation_manager.cpp:60 to break; - - //! but we do need to resolve all types so that the types of a projection match the - //! size of the column bindings. - - - //! Just need to know what the logical execute does. So confused. -// auto &op = root; -// while (root.type == LogicalOperatorType::LOGICAL_EXECUTE) { -// root = root.children[0]; -// } - - // What I want to know is why a logical execute is planned and what it is used for exactly. - // It doesn't make a lot of sense to me. - root.ResolveOperatorTypes(); - } //! Perform DelimJoin elimination void Optimize(unique_ptr &op); LogicalOperator &root; diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index 2bd77586bd39..c0ef549b0561 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -34,7 +34,7 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { - auto whatever = 'sdfgs'; + types = prepared->types; // already resolved } vector GetColumnBindings() override { diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 41d38afae12d..4fb5d7ce705a 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,6 +6,10 @@ namespace duckdb { +OperationConverter::OperationConverter(LogicalOperator &root) : root(root) { + root.ResolveOperatorTypes(); +} + void OperationConverter::Optimize(unique_ptr &op) { for (auto &child : op->children) { Optimize(child); @@ -18,9 +22,7 @@ void OperationConverter::Optimize(unique_ptr &op) { auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); -// if (left->types.size() != left_bindings.size()) { -// auto cool = "a"; -// } + D_ASSERT(left->types.size() == left_bindings.size()); D_ASSERT(right->types.size() == right_bindings.size()); vector conditions; diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index 3020f6585ed7..e7762ea5cb10 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -30,7 +30,6 @@ static void CheckTreeDepth(const LogicalOperator &op, idx_t max_depth, idx_t dep void Planner::CreatePlan(SQLStatement &statement) { auto &profiler = QueryProfiler::Get(context); auto parameter_count = statement.n_param; - BoundParameterMap bound_parameters(parameter_data); // first bind the tables and columns to the catalog From 7a70a7ec616de950f156d97b16970eac11afc64c Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 12:20:52 +0100 Subject: [PATCH 15/32] remove unused code --- src/optimizer/join_order/relation_manager.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index 37d5f7ae99a3..ea1698f86054 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -56,9 +56,6 @@ void RelationManager::AddRelation(LogicalOperator &op, optional_ptr table_references; - if (op.type == LogicalOperatorType::LOGICAL_EXECUTE) { - auto stop_here = "stop here"; - } LogicalJoin::GetTableReferences(op, table_references); D_ASSERT(table_references.size() > 0); for (auto &reference : table_references) { From 0505a10d0f3110cb4dd4b9fa6b42fe3101551e51 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 14 Nov 2023 14:17:00 +0100 Subject: [PATCH 16/32] tidy fixes --- src/optimizer/operation_converter.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 4fb5d7ce705a..cd0e2df1cf8d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -1,8 +1,10 @@ #include "duckdb/optimizer/operation_converter.hpp" - #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_conjunction_expression.hpp" +#include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/operator/logical_delim_get.hpp" +#include "duckdb/common/enums/join_type.hpp" +#include "duckdb/planner/joinside.hpp" namespace duckdb { From 16eda68fc0f8fe866288cf79273f6b667d27174e Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:51:46 +0100 Subject: [PATCH 17/32] clang tidy. more fixes --- src/optimizer/operation_converter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index cd0e2df1cf8d..75f962f5a9f7 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -5,6 +5,7 @@ #include "duckdb/planner/operator/logical_delim_get.hpp" #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" +#include "duckdb/planner/operator/logical_comparison_join.hpp" namespace duckdb { From b7f298bae7d92ead2214faadc6eab0a4cb30e9bf Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:54:04 +0100 Subject: [PATCH 18/32] remove redundant return true --- src/optimizer/join_order/relation_manager.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/optimizer/join_order/relation_manager.cpp b/src/optimizer/join_order/relation_manager.cpp index ea1698f86054..0af2a25c3a71 100644 --- a/src/optimizer/join_order/relation_manager.cpp +++ b/src/optimizer/join_order/relation_manager.cpp @@ -90,9 +90,7 @@ static bool OperatorIsNonReorderable(LogicalOperatorType op_type) { switch (op_type) { case LogicalOperatorType::LOGICAL_UNION: case LogicalOperatorType::LOGICAL_EXCEPT: - case LogicalOperatorType::LOGICAL_INTERSECT: { - return true; - } + case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_DELIM_JOIN: case LogicalOperatorType::LOGICAL_ANY_JOIN: case LogicalOperatorType::LOGICAL_ASOF_JOIN: From bdc72cc64f5042dadcfd10d1e1a959cf2e385593 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 15 Nov 2023 09:59:06 +0100 Subject: [PATCH 19/32] naming fix --- src/optimizer/join_order/relation_statistics_helper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimizer/join_order/relation_statistics_helper.cpp b/src/optimizer/join_order/relation_statistics_helper.cpp index 0dce879b7428..9650a61c7155 100644 --- a/src/optimizer/join_order/relation_statistics_helper.cpp +++ b/src/optimizer/join_order/relation_statistics_helper.cpp @@ -228,7 +228,8 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log ret.cardinality = MaxValue(child_1_card, child_2_card); ret.stats_initialized = true; ret.filter_strength = 1; - ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name; + ret.table_name = + "(" + child_stats[0].table_name + LogicalOperatorToString(op.type) + child_stats[1].table_name + ")"; for (auto &stats : child_stats) { // MARK joins are nonreorderable. They won't return initialized stats // continue in this case. From 08d293e23518a20f726d77fd26d8bcf807db1a59 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 23 Nov 2023 16:01:18 +0100 Subject: [PATCH 20/32] very lost. Dont know how to push an aggregate on the join. I think I might need to rebind everything if I do it --- .../duckdb/optimizer/operation_converter.hpp | 5 +- src/optimizer/operation_converter.cpp | 40 +++++++++- src/optimizer/optimizer.cpp | 4 +- test/optimizer/setops/pushdown_set_op.test | 22 ++--- test/sql/setops/value_union.test | 80 +++++++++---------- 5 files changed, 92 insertions(+), 59 deletions(-) diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp index ff560d2d0cf2..e00a7b9c35c4 100644 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ b/src/include/duckdb/optimizer/operation_converter.hpp @@ -15,11 +15,12 @@ namespace duckdb { //! The Operation Converter converts Set operations to joins when possible class OperationConverter { public: - OperationConverter(LogicalOperator &root); + OperationConverter(LogicalOperator &root, Binder &binder); //! Perform DelimJoin elimination - void Optimize(unique_ptr &op); + void Optimize(unique_ptr &op, bool is_root = false); LogicalOperator &root; + Binder &binder; private: }; diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 75f962f5a9f7..ca9a5ed86d47 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,20 +6,32 @@ #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/operator/logical_set_operation.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/operator/logical_aggregate.hpp" namespace duckdb { -OperationConverter::OperationConverter(LogicalOperator &root) : root(root) { +OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { root.ResolveOperatorTypes(); } -void OperationConverter::Optimize(unique_ptr &op) { +void OperationConverter::Optimize(unique_ptr &op, bool is_root) { for (auto &child : op->children) { Optimize(child); } switch (op->type) { + // if it is setop all, we don't replace (even though we technically still could) + // if it is not setop all, duplicate elimination should happen case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { + auto &set_op = op->Cast(); + if (set_op.setop_all || is_root) { + // If set_op all is not defined, then results need to be deduplicated. + // if the operator is the root, then break. We don't want to update the root + // and the join order optimizer can still have an effect. + break; + } auto &left = op->children[0]; auto &right = op->children[1]; auto left_bindings = left->GetColumnBindings(); @@ -45,11 +57,11 @@ void OperationConverter::Optimize(unique_ptr &op) { join_op->children.push_back(std::move(left)); join_op->children.push_back(std::move(right)); join_op->conditions = std::move(conditions); + join_op->ResolveOperatorTypes(); - // update the op so that it is the join op. op = std::move(join_op); - // now perform column binding replacement + // perform column binding replacement auto new_bindings = op->GetColumnBindings(); D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; @@ -60,6 +72,26 @@ void OperationConverter::Optimize(unique_ptr &op) { auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); + + // push a group by all on top of the join + + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> select_list, groups, aggregates /* left empty */; + D_ASSERT(join_bindings.size() == types.size()); + auto table_index = binder.GenerateTableIndex(); + auto aggregate_index = binder.GenerateTableIndex(); + for (idx_t i = 0; i < join_bindings.size(); i++) { + select_list.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); +// groups.push_back(make_uniq(types[i], join_bindings.at(i))); +// groups.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); + } + auto group_by = make_uniq(table_index, aggregate_index, std::move(select_list)); + group_by->groups = std::move(groups); + group_by->children.push_back(std::move(op)); + + op = std::move(group_by); + } default: break; diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 37677af27552..e8dcbde7925d 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -122,8 +122,8 @@ unique_ptr Optimizer::Optimize(unique_ptr plan // Convert setop operations to joins if possible RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan); - converter.Optimize(plan); + OperationConverter converter(*plan, binder); + converter.Optimize(plan, true); }); // then we perform the join ordering optimization diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index 1b98d31a690f..c1917ac650c6 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -6,61 +6,61 @@ statement ok PRAGMA explain_output = 'OPTIMIZED_ONLY' query II -explain select 42 intersect select 42; +explain select 42 intersect ALL select 42; ---- logical_opt :.*SEMI.* # intersect is empty if either side is empty query II -explain select 42 intersect select 42 where 1=0; +explain select 42 intersect ALL select 42 where 1=0; ---- logical_opt :.*SEMI.* query II -explain select 42 where 1=0 intersect select 42; +explain select 42 where 1=0 intersect ALL select 42; ---- logical_opt :.*SEMI.* # except is empty if LHS is empty query II -explain select 42 where 1=0 except select 42; +explain select 42 where 1=0 except ALL select 42; ---- logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II -explain select 42 except select 42 where 1=0; +explain select 42 except ALL select 42 where 1=0; ---- logical_opt :.*ANTI.* # now pushdown subquery with set ops query II -explain select * from (select 42 intersect select 42) tbl(i) where i=42; +explain select * from (select 42 intersect ALL select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 intersect select 43) tbl(i) where i=42; +explain select * from (select 42 intersect ALL select 43) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 43 intersect select 42) tbl(i) where i=42; +explain select * from (select 43 intersect ALL select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 except select 42) tbl(i) where i=42; +explain select * from (select 42 except ALL select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 42 except select 43) tbl(i) where i=42; +explain select * from (select 42 except ALL select 43) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 43 except select 42) tbl(i) where i=42; +explain select * from (select 43 except ALL select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* diff --git a/test/sql/setops/value_union.test b/test/sql/setops/value_union.test index b83384ef12f8..030b165da9bc 100644 --- a/test/sql/setops/value_union.test +++ b/test/sql/setops/value_union.test @@ -8,12 +8,12 @@ PRAGMA enable_verification statement ok CREATE VIEW vals AS SELECT * FROM (VALUES (1, 10), (2, 5), (3, 4)) tbl(i, j); -query II -SELECT * FROM vals ----- -1 10 -2 5 -3 4 +#query II +#SELECT * FROM vals +#---- +#1 10 +#2 5 +#3 4 statement ok CREATE VIEW vunion AS @@ -21,39 +21,39 @@ SELECT * FROM vals UNION ALL SELECT * FROM vals; -query II -SELECT * FROM vunion ORDER BY i ----- -1 10 -1 10 -2 5 -2 5 -3 4 -3 4 - -query II -SELECT * FROM vunion ORDER BY i LIMIT 1 ----- -1 10 - -query II -SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 ----- -2 5 -2 5 - -query II -SELECT * FROM vunion WHERE i=1 ----- -1 10 -1 10 - -query II -SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 ----- -1 10 -2 5 -3 4 +#query II +#SELECT * FROM vunion ORDER BY i +#---- +#1 10 +#1 10 +#2 5 +#2 5 +#3 4 +#3 4 +# +#query II +#SELECT * FROM vunion ORDER BY i LIMIT 1 +#---- +#1 10 +# +#query II +#SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 +#---- +#2 5 +#2 5 +# +#query II +#SELECT * FROM vunion WHERE i=1 +#---- +#1 10 +#1 10 +# +#query II +#SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 +#---- +#1 10 +#2 5 +#3 4 query II SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 1; @@ -62,6 +62,6 @@ SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 2 5 3 4 -query II +query IIs SELECT * FROM (SELECT * FROM vunion EXCEPT SELECT * FROM vunion) tbl ---- From 9d89e86eda12cfe61d72e523a74a7bab9417da65 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Fri, 24 Nov 2023 12:53:19 +0100 Subject: [PATCH 21/32] just add a logical distinct instead --- src/optimizer/operation_converter.cpp | 41 +++++++------- test/sql/setops/value_union.test | 80 +++++++++++++-------------- 2 files changed, 60 insertions(+), 61 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index ca9a5ed86d47..262adbce0809 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -9,6 +9,7 @@ #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" +#include "duckdb/planner/operator/logical_distinct.hpp" namespace duckdb { @@ -32,6 +33,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) // and the join order optimizer can still have an effect. break; } + auto table_index = set_op.table_index; auto &left = op->children[0]; auto &right = op->children[1]; auto left_bindings = left->GetColumnBindings(); @@ -61,7 +63,24 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) op = std::move(join_op); - // perform column binding replacement + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + + // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; @@ -72,26 +91,6 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); - - // push a group by all on top of the join - - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> select_list, groups, aggregates /* left empty */; - D_ASSERT(join_bindings.size() == types.size()); - auto table_index = binder.GenerateTableIndex(); - auto aggregate_index = binder.GenerateTableIndex(); - for (idx_t i = 0; i < join_bindings.size(); i++) { - select_list.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); -// groups.push_back(make_uniq(types[i], join_bindings.at(i))); -// groups.push_back(make_uniq(types[i], ColumnBinding(table_index, i))); - } - auto group_by = make_uniq(table_index, aggregate_index, std::move(select_list)); - group_by->groups = std::move(groups); - group_by->children.push_back(std::move(op)); - - op = std::move(group_by); - } default: break; diff --git a/test/sql/setops/value_union.test b/test/sql/setops/value_union.test index 030b165da9bc..b83384ef12f8 100644 --- a/test/sql/setops/value_union.test +++ b/test/sql/setops/value_union.test @@ -8,12 +8,12 @@ PRAGMA enable_verification statement ok CREATE VIEW vals AS SELECT * FROM (VALUES (1, 10), (2, 5), (3, 4)) tbl(i, j); -#query II -#SELECT * FROM vals -#---- -#1 10 -#2 5 -#3 4 +query II +SELECT * FROM vals +---- +1 10 +2 5 +3 4 statement ok CREATE VIEW vunion AS @@ -21,39 +21,39 @@ SELECT * FROM vals UNION ALL SELECT * FROM vals; -#query II -#SELECT * FROM vunion ORDER BY i -#---- -#1 10 -#1 10 -#2 5 -#2 5 -#3 4 -#3 4 -# -#query II -#SELECT * FROM vunion ORDER BY i LIMIT 1 -#---- -#1 10 -# -#query II -#SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 -#---- -#2 5 -#2 5 -# -#query II -#SELECT * FROM vunion WHERE i=1 -#---- -#1 10 -#1 10 -# -#query II -#SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 -#---- -#1 10 -#2 5 -#3 4 +query II +SELECT * FROM vunion ORDER BY i +---- +1 10 +1 10 +2 5 +2 5 +3 4 +3 4 + +query II +SELECT * FROM vunion ORDER BY i LIMIT 1 +---- +1 10 + +query II +SELECT * FROM (SELECT * FROM vunion ORDER BY i LIMIT 4) tbl ORDER BY j LIMIT 2 +---- +2 5 +2 5 + +query II +SELECT * FROM vunion WHERE i=1 +---- +1 10 +1 10 + +query II +SELECT DISTINCT * FROM (SELECT * FROM vunion UNION ALL SELECT * FROM vunion) tbl ORDER BY 1 +---- +1 10 +2 5 +3 4 query II SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 1; @@ -62,6 +62,6 @@ SELECT * FROM (SELECT * FROM vunion INTERSECT SELECT * FROM vunion) tbl ORDER BY 2 5 3 4 -query IIs +query II SELECT * FROM (SELECT * FROM vunion EXCEPT SELECT * FROM vunion) tbl ---- From a3497adeecdc4376afe9ac6ceff043bc5584ac18 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 27 Nov 2023 15:01:59 +0100 Subject: [PATCH 22/32] fix broken tests after adding distinct --- src/optimizer/operation_converter.cpp | 1 + test/optimizer/setops/pushdown_set_op.test | 22 +++++------ test/sql/tpcds/tpcds_sf0.test | 43 +++++++++++----------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 262adbce0809..07c92d04f2ea 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -79,6 +79,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto projection = make_uniq(table_index, std::move(select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); + op->ResolveOperatorTypes(); // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); diff --git a/test/optimizer/setops/pushdown_set_op.test b/test/optimizer/setops/pushdown_set_op.test index c1917ac650c6..1b98d31a690f 100644 --- a/test/optimizer/setops/pushdown_set_op.test +++ b/test/optimizer/setops/pushdown_set_op.test @@ -6,61 +6,61 @@ statement ok PRAGMA explain_output = 'OPTIMIZED_ONLY' query II -explain select 42 intersect ALL select 42; +explain select 42 intersect select 42; ---- logical_opt :.*SEMI.* # intersect is empty if either side is empty query II -explain select 42 intersect ALL select 42 where 1=0; +explain select 42 intersect select 42 where 1=0; ---- logical_opt :.*SEMI.* query II -explain select 42 where 1=0 intersect ALL select 42; +explain select 42 where 1=0 intersect select 42; ---- logical_opt :.*SEMI.* # except is empty if LHS is empty query II -explain select 42 where 1=0 except ALL select 42; +explain select 42 where 1=0 except select 42; ---- logical_opt :.*ANTI.* # if RHS is empty we can optimize away the except query II -explain select 42 except ALL select 42 where 1=0; +explain select 42 except select 42 where 1=0; ---- logical_opt :.*ANTI.* # now pushdown subquery with set ops query II -explain select * from (select 42 intersect ALL select 42) tbl(i) where i=42; +explain select * from (select 42 intersect select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 intersect ALL select 43) tbl(i) where i=42; +explain select * from (select 42 intersect select 43) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 43 intersect ALL select 42) tbl(i) where i=42; +explain select * from (select 43 intersect select 42) tbl(i) where i=42; ---- logical_opt :.*SEMI.* query II -explain select * from (select 42 except ALL select 42) tbl(i) where i=42; +explain select * from (select 42 except select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 42 except ALL select 43) tbl(i) where i=42; +explain select * from (select 42 except select 43) tbl(i) where i=42; ---- logical_opt :.*ANTI.* query II -explain select * from (select 43 except ALL select 42) tbl(i) where i=42; +explain select * from (select 43 except select 42) tbl(i) where i=42; ---- logical_opt :.*ANTI.* diff --git a/test/sql/tpcds/tpcds_sf0.test b/test/sql/tpcds/tpcds_sf0.test index 06b08350cd67..deeca802ce4b 100644 --- a/test/sql/tpcds/tpcds_sf0.test +++ b/test/sql/tpcds/tpcds_sf0.test @@ -7,26 +7,25 @@ require tpcds statement ok CALL dsdgen(sf=0) -loop i 1 100 - -statement ok -PRAGMA tpcds(${i}) - -endloop - -# out of range -statement error -PRAGMA tpcds(-1) - -statement error -PRAGMA tpcds(3290819023812038903) - -statement error -PRAGMA tpcds(32908301298) - -statement error -PRAGMA tpcds(1.1) - -# queries statement ok -SELECT * FROM tpcds_queries() +PRAGMA tpcds(14) + +# +#endloop +# +## out of range +#statement error +#PRAGMA tpcds(-1) +# +#statement error +#PRAGMA tpcds(3290819023812038903) +# +#statement error +#PRAGMA tpcds(32908301298) +# +#statement error +#PRAGMA tpcds(1.1) +# +## queries +#statement ok +#SELECT * FROM tpcds_queries() From ade48bfabd9c4fcc477ddeb4877ce7161a18d766 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Wed, 29 Nov 2023 11:22:20 +0100 Subject: [PATCH 23/32] add header --- src/optimizer/operation_converter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 07c92d04f2ea..5bea56b57cfd 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -8,6 +8,7 @@ #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" From 6909c88d7dbb47655c52bbc6a544280287cf6170 Mon Sep 17 00:00:00 2001 From: Tom Ebergen Date: Mon, 4 Dec 2023 13:42:22 +0100 Subject: [PATCH 24/32] clean up PR --- .../planner/operator/logical_execute.hpp | 1 - src/planner/logical_operator.cpp | 2 +- src/planner/planner.cpp | 1 + test/sql/tpcds/tpcds_sf0.test | 39 +++++++++---------- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/include/duckdb/planner/operator/logical_execute.hpp b/src/include/duckdb/planner/operator/logical_execute.hpp index c0ef549b0561..0c6eff24c804 100644 --- a/src/include/duckdb/planner/operator/logical_execute.hpp +++ b/src/include/duckdb/planner/operator/logical_execute.hpp @@ -35,7 +35,6 @@ class LogicalExecute : public LogicalOperator { protected: void ResolveTypes() override { types = prepared->types; - // already resolved } vector GetColumnBindings() override { return GenerateColumnBindings(0, types.size()); diff --git a/src/planner/logical_operator.cpp b/src/planner/logical_operator.cpp index 03e9e88be069..8c3c4c93b756 100644 --- a/src/planner/logical_operator.cpp +++ b/src/planner/logical_operator.cpp @@ -42,8 +42,8 @@ string LogicalOperator::ParamsToString() const { } void LogicalOperator::ResolveOperatorTypes() { - types.clear(); + types.clear(); // first resolve child types for (auto &child : children) { child->ResolveOperatorTypes(); diff --git a/src/planner/planner.cpp b/src/planner/planner.cpp index c6c44b03c28b..4b1c28d6a5b2 100644 --- a/src/planner/planner.cpp +++ b/src/planner/planner.cpp @@ -30,6 +30,7 @@ static void CheckTreeDepth(const LogicalOperator &op, idx_t max_depth, idx_t dep void Planner::CreatePlan(SQLStatement &statement) { auto &profiler = QueryProfiler::Get(context); auto parameter_count = statement.n_param; + BoundParameterMap bound_parameters(parameter_data); // first bind the tables and columns to the catalog diff --git a/test/sql/tpcds/tpcds_sf0.test b/test/sql/tpcds/tpcds_sf0.test index deeca802ce4b..09ef16e24674 100644 --- a/test/sql/tpcds/tpcds_sf0.test +++ b/test/sql/tpcds/tpcds_sf0.test @@ -7,25 +7,22 @@ require tpcds statement ok CALL dsdgen(sf=0) +loop i 1 100 + statement ok -PRAGMA tpcds(14) - -# -#endloop -# -## out of range -#statement error -#PRAGMA tpcds(-1) -# -#statement error -#PRAGMA tpcds(3290819023812038903) -# -#statement error -#PRAGMA tpcds(32908301298) -# -#statement error -#PRAGMA tpcds(1.1) -# -## queries -#statement ok -#SELECT * FROM tpcds_queries() +PRAGMA tpcds(${i}) + +endloop + +# out of range +statement error +PRAGMA tpcds(-1) + +statement error +PRAGMA tpcds(3290819023812038903) + +statement error +PRAGMA tpcds(32908301298) + +statement error +PRAGMA tpcds(1.1) From d4e70d42f5d4d76ed9912417a9e7909c0823fab2 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 08:20:57 -0800 Subject: [PATCH 25/32] try to do everything in the optimizer --- .../physical_plan/plan_set_operation.cpp | 4 + src/optimizer/operation_converter.cpp | 92 +++++++++++++++---- 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/src/execution/physical_plan/plan_set_operation.cpp b/src/execution/physical_plan/plan_set_operation.cpp index ef0f17554f04..4552abf154d3 100644 --- a/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/execution/physical_plan/plan_set_operation.cpp @@ -43,6 +43,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati throw InvalidInputException("Type mismatch for SET OPERATION"); } + // can't swich logical unions to semi/anti join + // also if the operation is a INTERSECT ALL or EXCEPT ALL switch (op.type) { case LogicalOperatorType::LOGICAL_UNION: // UNION @@ -51,6 +53,8 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { + + auto &types = left->GetTypes(); vector conditions; // create equality condition for all columns diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 5bea56b57cfd..17fef7e6f908 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -6,10 +6,11 @@ #include "duckdb/common/enums/join_type.hpp" #include "duckdb/planner/joinside.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" +#include "duckdb/planner/operator/logical_window.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" -#include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" namespace duckdb { @@ -28,15 +29,49 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) case LogicalOperatorType::LOGICAL_INTERSECT: case LogicalOperatorType::LOGICAL_EXCEPT: { auto &set_op = op->Cast(); - if (set_op.setop_all || is_root) { - // If set_op all is not defined, then results need to be deduplicated. + auto setop_all = set_op.setop_all; + if (is_root) { // if the operator is the root, then break. We don't want to update the root // and the join order optimizer can still have an effect. break; +// auto a = 0; } + auto table_index = set_op.table_index; auto &left = op->children[0]; auto &right = op->children[1]; + + auto old_bindings = op->GetColumnBindings(); + + if (setop_all) { + + // instead create a logical projection on top of whatever to add the window expression, then + auto row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; + auto left_bindings = left->children[0]->GetColumnBindings(); + for (idx_t i = 0; i < left_bindings.size(); i++) { + row_num->partitions.push_back(make_uniq(op->types[i], left_bindings[i])); + } + left->expressions.push_back(std::move(row_num)); + left->types.push_back(LogicalType::BIGINT); + + row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; + auto right_bindings = right->children[0]->GetColumnBindings(); + for (idx_t i = 0; i < right_bindings.size(); i++) { + row_num->partitions.push_back(make_uniq(op->types[i], right_bindings[i])); + } + right->expressions.push_back(std::move(row_num)); + right->types.push_back(LogicalType::BIGINT); + + // join (created below) now includes the row number result column from the left + left_bindings = left->GetColumnBindings(); + set_op.types.push_back(LogicalType::BIGINT); +// set_op.expressions.push_back(make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1])); + set_op.column_count += 1; + } auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); @@ -54,7 +89,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) } JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - auto old_bindings = op->GetColumnBindings(); + auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); @@ -64,27 +99,43 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) op = std::move(join_op); - // push a distinct operator on the join - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> distinct_targets; - vector> select_list; - for (idx_t i = 0; i < join_bindings.size(); i++) { - distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); - select_list.push_back(make_uniq(types[i], join_bindings[i])); + // create projection to remove row_id. + if (setop_all) { + vector> projection_select_list; + auto bindings = op->GetColumnBindings(); + for (idx_t i = 0; i < bindings.size() - 1; i++) { + projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); + } + auto projection_table_index = binder.GenerateTableIndex(); + auto projection = + make_uniq(projection_table_index, std::move(projection_select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); } - auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); - distinct->children.push_back(std::move(op)); - op = std::move(distinct); - auto projection = make_uniq(table_index, std::move(select_list)); - projection->children.push_back(std::move(op)); - op = std::move(projection); - op->ResolveOperatorTypes(); + if (!setop_all) { + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + op->ResolveOperatorTypes(); + } // now perform column binding replacement auto new_bindings = op->GetColumnBindings(); - D_ASSERT(old_bindings.size() == new_bindings.size()); + +// D_ASSERT(old_bindings.size() == new_bindings.size()); vector replacement_bindings; for (idx_t i = 0; i < old_bindings.size(); i++) { replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); @@ -93,6 +144,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto binding_replacer = ColumnBindingReplacer(); binding_replacer.replacement_bindings = replacement_bindings; binding_replacer.VisitOperator(root); + break; } default: break; From 4cc6c65c36e2697ca59d746ada101df16f208de8 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 09:46:16 -0800 Subject: [PATCH 26/32] all passes now, need to clean this up and move it to the planning phase --- src/optimizer/operation_converter.cpp | 72 +++++---- test/sql/setops/test_setops.test | 211 +++++++++++++------------- 2 files changed, 149 insertions(+), 134 deletions(-) diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp index 17fef7e6f908..e08cd4a5ae6d 100644 --- a/src/optimizer/operation_converter.cpp +++ b/src/optimizer/operation_converter.cpp @@ -12,7 +12,7 @@ #include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" #include "duckdb/planner/operator/logical_distinct.hpp" - +#include "iostream" namespace duckdb { OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { @@ -34,62 +34,79 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) // if the operator is the root, then break. We don't want to update the root // and the join order optimizer can still have an effect. break; -// auto a = 0; } auto table_index = set_op.table_index; auto &left = op->children[0]; + auto left_types = op->children[0]->types; auto &right = op->children[1]; + auto right_types = op->children[1]->types; auto old_bindings = op->GetColumnBindings(); if (setop_all) { // instead create a logical projection on top of whatever to add the window expression, then - auto row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; - auto left_bindings = left->children[0]->GetColumnBindings(); - for (idx_t i = 0; i < left_bindings.size(); i++) { - row_num->partitions.push_back(make_uniq(op->types[i], left_bindings[i])); + auto left_window_table_index = binder.GenerateTableIndex(); + auto left_window = make_uniq(left_window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto left_bindings = left->GetColumnBindings(); + for (idx_t i = 0; i < left_types.size(); i++) { + row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); } - left->expressions.push_back(std::move(row_num)); - left->types.push_back(LogicalType::BIGINT); - - row_num = make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_num->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_num->end = WindowBoundary::UNBOUNDED_FOLLOWING; - auto right_bindings = right->children[0]->GetColumnBindings(); + left_window->expressions.push_back(std::move(row_number)); + left_window->AddChild(std::move(left)); + left = std::move(left_window); + + auto right_window_table_index = binder.GenerateTableIndex(); + auto right_window = make_uniq(right_window_table_index); + row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto right_bindings = right->GetColumnBindings(); for (idx_t i = 0; i < right_bindings.size(); i++) { - row_num->partitions.push_back(make_uniq(op->types[i], right_bindings[i])); + row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); } - right->expressions.push_back(std::move(row_num)); - right->types.push_back(LogicalType::BIGINT); + right_window->expressions.push_back(std::move(row_number)); + right_window->AddChild(std::move(right)); + right = std::move(right_window); - // join (created below) now includes the row number result column from the left - left_bindings = left->GetColumnBindings(); set_op.types.push_back(LogicalType::BIGINT); -// set_op.expressions.push_back(make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1])); set_op.column_count += 1; } auto left_bindings = left->GetColumnBindings(); auto right_bindings = right->GetColumnBindings(); D_ASSERT(left_bindings.size() == right_bindings.size()); - D_ASSERT(left->types.size() == left_bindings.size()); - D_ASSERT(right->types.size() == right_bindings.size()); + if (setop_all) { + D_ASSERT(left_types.size() + 1 == left_bindings.size()); + D_ASSERT(right_types.size() + 1 == right_bindings.size()); + } vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left_bindings.size(); i++) { + for (idx_t i = 0; i < left_types.size(); i++) { JoinCondition cond; - cond.left = make_uniq(left->types[i], left_bindings[i]); - cond.right = make_uniq(right->types[i], right_bindings[i]); + cond.left = make_uniq(left_types[i], left_bindings[i]); + cond.right = make_uniq(right_types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } - JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + if (setop_all) { + JoinCondition cond; + cond.left = + make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); + cond.right = + make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; auto join_op = make_uniq(join_type); join_op->children.push_back(std::move(left)); @@ -110,6 +127,7 @@ void OperationConverter::Optimize(unique_ptr &op, bool is_root) auto projection = make_uniq(projection_table_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); + projection->ResolveOperatorTypes(); op = std::move(projection); } diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index ba490270491c..67be915e6f92 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -5,115 +5,115 @@ statement ok PRAGMA enable_verification -#query I -#SELECT 1 UNION ALL SELECT 2 -#---- -#1 -#2 -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 -#---- -#1 a -#2 b -#3 c -#4 d +query I +SELECT 1 UNION ALL SELECT 2 +---- +1 +2 + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 +---- +1 a +2 b +3 c + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 +---- +1 a +2 b +3 c +4 d # UNION/EXCEPT/INTERSECT with NULL values -#query I -#SELECT NULL UNION SELECT NULL -#---- -#NULL -# -#query I -#SELECT NULL EXCEPT SELECT NULL -#---- -# -#query I -#SELECT NULL INTERSECT SELECT NULL -#---- -#NULL +query I +SELECT NULL UNION SELECT NULL +---- +NULL + +query I +SELECT NULL EXCEPT SELECT NULL +---- + +query I +SELECT NULL INTERSECT SELECT NULL +---- +NULL # create tables -#statement ok -#CREATE TABLE test (a INTEGER, b INTEGER); -# -#statement ok -#INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) +statement ok +CREATE TABLE test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) # UNION ALL, no unique results -#query I -#SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 -#---- -#11 -#12 -#13 -# -#query I -#SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 -#---- -#1 -#1 -#1 -#2 -# -## mixing types, should upcast -#query T -#SELECT 1 UNION ALL SELECT 'asdf' -#---- -#1 -#asdf -# -#query T -#SELECT NULL UNION ALL SELECT 'asdf' -#---- -#NULL -#asdf -# -## only UNION, distinct results -#query I -#SELECT 1 UNION SELECT 1 -#---- -#1 -# -#query IT -#SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query I -#SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 -#---- -#1 -#2 -# -## mixed fun -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC -#---- -#2 b -#1 a +query I +SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 +---- +11 +12 +13 + +query I +SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +---- +1 +1 +1 +2 + +# mixing types, should upcast +query T +SELECT 1 UNION ALL SELECT 'asdf' +---- +1 +asdf + +query T +SELECT NULL UNION ALL SELECT 'asdf' +---- +NULL +asdf + +# only UNION, distinct results +query I +SELECT 1 UNION SELECT 1 +---- +1 + +query IT +SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +---- +1 a +2 b +3 c + +query I +SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +---- +1 +2 + +# mixed fun +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +---- +2 b +1 a statement ok create table t1 as (select * from (values(1),(2),(2),(3),(3),(3),(4),(4),(4),(4)) s(x)); @@ -124,15 +124,12 @@ create table t2 as (select * from (values(1),(3),(3)) t(x)); statement ok create table t3 as (select * from (values(2),(2),(2),(4),(3),(3)) u(x)); - statement ok select * from (select * from t1 except all (select * from t2)); -mode skip - # EXCEPT ALL / INTERSECT ALL query II -select * from +select *, count(*) from (select * from ((select x from t1 except all (select x from t2)) intersect all From cbb7236d789f1bf55af3bbcd4c1df77dadea6172 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 11:50:37 -0800 Subject: [PATCH 27/32] honestly having much more trouble moving this to the planner than I thought I would --- src/main/config.cpp | 2 +- src/optimizer/optimizer.cpp | 204 ++++++++--------- src/planner/binder/query_node/plan_setop.cpp | 134 +++++++++++ test/sql/setops/test_setops.test | 222 +++++++++---------- 4 files changed, 348 insertions(+), 214 deletions(-) diff --git a/src/main/config.cpp b/src/main/config.cpp index 66fd433837e0..70ecfba36cc4 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -15,7 +15,7 @@ namespace duckdb { #ifdef DEBUG -bool DBConfigOptions::debug_print_bindings = false; +bool DBConfigOptions::debug_print_bindings = true; #endif #define DUCKDB_GLOBAL(_PARAM) \ diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index e8dcbde7925d..5eefb9978756 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -93,38 +93,38 @@ unique_ptr Optimizer::Optimize(unique_ptr plan RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); // perform filter pullup - RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { - FilterPullup filter_pullup; - plan = filter_pullup.Rewrite(std::move(plan)); - }); - - // perform filter pushdown - RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { - FilterPushdown filter_pushdown(*this); - plan = filter_pushdown.Rewrite(std::move(plan)); - }); - - RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { - RegexRangeFilter regex_opt; - plan = regex_opt.Rewrite(std::move(plan)); - }); - - RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { - InClauseRewriter ic_rewriter(context, *this); - plan = ic_rewriter.Rewrite(std::move(plan)); - }); - - // removes any redundant DelimGets/DelimJoins - RunOptimizer(OptimizerType::DELIMINATOR, [&]() { - Deliminator deliminator; - plan = deliminator.Optimize(std::move(plan)); - }); +// RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { +// FilterPullup filter_pullup; +// plan = filter_pullup.Rewrite(std::move(plan)); +// }); +// +// // perform filter pushdown +// RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { +// FilterPushdown filter_pushdown(*this); +// plan = filter_pushdown.Rewrite(std::move(plan)); +// }); +// +// RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { +// RegexRangeFilter regex_opt; +// plan = regex_opt.Rewrite(std::move(plan)); +// }); +// +// RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { +// InClauseRewriter ic_rewriter(context, *this); +// plan = ic_rewriter.Rewrite(std::move(plan)); +// }); +// +// // removes any redundant DelimGets/DelimJoins +// RunOptimizer(OptimizerType::DELIMINATOR, [&]() { +// Deliminator deliminator; +// plan = deliminator.Optimize(std::move(plan)); +// }); // Convert setop operations to joins if possible - RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan, binder); - converter.Optimize(plan, true); - }); +// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { +// OperationConverter converter(*plan, binder); +// converter.Optimize(plan, true); +// }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns @@ -134,78 +134,78 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // rewrites UNNESTs in DelimJoins by moving them to the projection - RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { - UnnestRewriter unnest_rewriter; - plan = unnest_rewriter.Optimize(std::move(plan)); - }); - - // removes unused columns - RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { - RemoveUnusedColumns unused(binder, context, true); - unused.VisitOperator(*plan); - }); - - // Remove duplicate groups from aggregates - RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { - RemoveDuplicateGroups remove; - remove.VisitOperator(*plan); - }); - - // then we extract common subexpressions inside the different operators - RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { - CommonSubExpressionOptimizer cse_optimizer(binder); - cse_optimizer.VisitOperator(*plan); - }); - - // creates projection maps so unused columns are projected out early - RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { - ColumnLifetimeAnalyzer column_lifetime(true); - column_lifetime.VisitOperator(*plan); - }); - - // perform statistics propagation - column_binding_map_t> statistics_map; - RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { - StatisticsPropagator propagator(*this); - propagator.PropagateStatistics(plan); - statistics_map = propagator.GetStatisticsMap(); - }); - - // remove duplicate aggregates - RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { - CommonAggregateOptimizer common_aggregate; - common_aggregate.VisitOperator(*plan); - }); - - // creates projection maps so unused columns are projected out early - RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { - ColumnLifetimeAnalyzer column_lifetime(true); - column_lifetime.VisitOperator(*plan); - }); - - // compress data based on statistics for materializing operators - RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { - CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); - compressed_materialization.Compress(plan); - }); - - // transform ORDER BY + LIMIT to TopN - RunOptimizer(OptimizerType::TOP_N, [&]() { - TopN topn; - plan = topn.Optimize(std::move(plan)); - }); - - // apply simple expression heuristics to get an initial reordering - RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { - ExpressionHeuristics expression_heuristics(*this); - plan = expression_heuristics.Rewrite(std::move(plan)); - }); - - for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { - RunOptimizer(OptimizerType::EXTENSION, [&]() { - optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); - }); - } +// RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { +// UnnestRewriter unnest_rewriter; +// plan = unnest_rewriter.Optimize(std::move(plan)); +// }); +// +// // removes unused columns +// RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { +// RemoveUnusedColumns unused(binder, context, true); +// unused.VisitOperator(*plan); +// }); +// +// // Remove duplicate groups from aggregates +// RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { +// RemoveDuplicateGroups remove; +// remove.VisitOperator(*plan); +// }); +// +// // then we extract common subexpressions inside the different operators +// RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { +// CommonSubExpressionOptimizer cse_optimizer(binder); +// cse_optimizer.VisitOperator(*plan); +// }); +// +// // creates projection maps so unused columns are projected out early +// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { +// ColumnLifetimeAnalyzer column_lifetime(true); +// column_lifetime.VisitOperator(*plan); +// }); +// +// // perform statistics propagation +// column_binding_map_t> statistics_map; +// RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { +// StatisticsPropagator propagator(*this); +// propagator.PropagateStatistics(plan); +// statistics_map = propagator.GetStatisticsMap(); +// }); +// +// // remove duplicate aggregates +// RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { +// CommonAggregateOptimizer common_aggregate; +// common_aggregate.VisitOperator(*plan); +// }); +// +// // creates projection maps so unused columns are projected out early +// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { +// ColumnLifetimeAnalyzer column_lifetime(true); +// column_lifetime.VisitOperator(*plan); +// }); +// +// // compress data based on statistics for materializing operators +// RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { +// CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); +// compressed_materialization.Compress(plan); +// }); +// +// // transform ORDER BY + LIMIT to TopN +// RunOptimizer(OptimizerType::TOP_N, [&]() { +// TopN topn; +// plan = topn.Optimize(std::move(plan)); +// }); +// +// // apply simple expression heuristics to get an initial reordering +// RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { +// ExpressionHeuristics expression_heuristics(*this); +// plan = expression_heuristics.Rewrite(std::move(plan)); +// }); +// +// for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { +// RunOptimizer(OptimizerType::EXTENSION, [&]() { +// optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); +// }); +// } Planner::VerifyPlan(context, plan); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index c313ae016981..74e1ae1b69eb 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -2,6 +2,9 @@ #include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/operator/logical_window.hpp" +#include "duckdb/planner/expression/bound_reference_expression.hpp" +#include "duckdb/planner/expression/bound_window_expression.hpp" #include "duckdb/planner/operator/logical_set_operation.hpp" #include "duckdb/planner/query_node/bound_set_operation_node.hpp" @@ -116,9 +119,140 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { break; } + // here we convert the set operation to anti semi if required. Using the node.setop all we know what conversion we + // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); + unique_ptr op; + + // if we have an intersect or except, immediately translate it to a semi or anti join. + if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) { + auto &left = root->children[0]; + auto &right = root->children[1]; + auto left_types = root->children[0]->types; + auto right_types = root->children[1]->types; + auto old_bindings = root->GetColumnBindings(); + auto table_index = root->table_index; + if (node.setop_all) { + + // instead create a logical projection on top of whatever to add the window expression, then + auto left_window_table_index = GenerateTableIndex(); + auto left_window = make_uniq(left_window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto left_bindings = left->GetColumnBindings(); + for (idx_t i = 0; i < left_types.size(); i++) { + auto left_type = LogicalType(LogicalType::UNKNOWN); + if (i < left_types.size()) { + left_type = left_types[i]; + } + row_number->partitions.push_back(make_uniq(left_type, left_bindings[i])); + } + left_window->expressions.push_back(std::move(row_number)); + left_window->AddChild(std::move(left)); + left = std::move(left_window); + + auto right_window_table_index = GenerateTableIndex(); + auto right_window = make_uniq(right_window_table_index); + row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto right_bindings = right->GetColumnBindings(); + for (idx_t i = 0; i < right_bindings.size(); i++) { + auto right_type = LogicalType(LogicalType::UNKNOWN); + if (i < right_types.size()) { + right_type = right_types[i]; + } + row_number->partitions.push_back(make_uniq(right_type, right_bindings[i])); + } + right_window->expressions.push_back(std::move(row_number)); + right_window->AddChild(std::move(right)); + right = std::move(right_window); + + root->types.push_back(LogicalType::BIGINT); + root->column_count += 1; + } + + auto left_bindings = left->GetColumnBindings(); + auto right_bindings = right->GetColumnBindings(); + D_ASSERT(left_bindings.size() == right_bindings.size()); + + vector conditions; + // create equality condition for all columns + for (idx_t i = 0; i < left_bindings.size() - 1; i++) { + auto cond_type_left = LogicalType(LogicalType::UNKNOWN); + auto cond_type_right = LogicalType(LogicalType::UNKNOWN); + if (i < left_types.size()) { + cond_type_left = left_types[i]; + cond_type_right = right_types[i]; + } + JoinCondition cond; + cond.left = make_uniq(cond_type_left, left_bindings[i]); + cond.right = make_uniq(cond_type_right, right_bindings[i]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + + if (node.setop_all) { + JoinCondition cond; + cond.left = + make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); + cond.right = + make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); + cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; + conditions.push_back(std::move(cond)); + } + + JoinType join_type = root->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; + + auto join_op = make_uniq(join_type); + join_op->children.push_back(std::move(left)); + join_op->children.push_back(std::move(right)); + join_op->conditions = std::move(conditions); + join_op->ResolveOperatorTypes(); + + op = std::move(join_op); + + // create projection to remove row_id. + if (node.setop_all) { + vector> projection_select_list; + auto bindings = op->GetColumnBindings(); + for (idx_t i = 0; i < bindings.size() - 1; i++) { + projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); + } + auto projection_table_index = GenerateTableIndex(); + auto projection = + make_uniq(projection_table_index, std::move(projection_select_list)); + projection->children.push_back(std::move(op)); +// projection->ResolveOperatorTypes(); + op = std::move(projection); + } + + if (!node.setop_all) { + // push a distinct operator on the join + auto &types = op->types; + auto join_bindings = op->GetColumnBindings(); + vector> distinct_targets; + vector> select_list; + for (idx_t i = 0; i < join_bindings.size(); i++) { + distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); + select_list.push_back(make_uniq(types[i], join_bindings[i])); + } + auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); + distinct->children.push_back(std::move(op)); + op = std::move(distinct); + + auto projection = make_uniq(table_index, std::move(select_list)); + projection->children.push_back(std::move(op)); + op = std::move(projection); + op->ResolveOperatorTypes(); + } + return VisitQueryNode(node, std::move(op)); + } return VisitQueryNode(node, std::move(root)); } diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 67be915e6f92..79bef33b31f4 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -2,118 +2,118 @@ # description: Test UNION/EXCEPT/INTERSECT # group: [setops] -statement ok -PRAGMA enable_verification - -query I -SELECT 1 UNION ALL SELECT 2 ----- -1 -2 - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' ----- -1 a -2 b - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 ----- -1 a -2 b -3 c - -query IT -SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 ----- -1 a -2 b -3 c -4 d - -# UNION/EXCEPT/INTERSECT with NULL values -query I -SELECT NULL UNION SELECT NULL ----- -NULL - -query I -SELECT NULL EXCEPT SELECT NULL ----- - -query I -SELECT NULL INTERSECT SELECT NULL ----- -NULL - +#statement ok +#PRAGMA enable_verification + +#query I +#SELECT 1 UNION ALL SELECT 2 +#---- +#1 +#2 +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' +#---- +#1 a +#2 b +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 +#---- +#1 a +#2 b +#3 c +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 +#---- +#1 a +#2 b +#3 c +#4 d +# +## UNION/EXCEPT/INTERSECT with NULL values +#query I +#SELECT NULL UNION SELECT NULL +#---- +#NULL + +#query I +#SELECT NULL EXCEPT SELECT NULL +#---- + +#query I +#SELECT NULL INTERSECT SELECT NULL +#---- +#NULL +# # create tables -statement ok -CREATE TABLE test (a INTEGER, b INTEGER); - -statement ok -INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) - -# UNION ALL, no unique results -query I -SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 ----- -11 -12 -13 - -query I -SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 ----- -1 -1 -1 -2 - -# mixing types, should upcast -query T -SELECT 1 UNION ALL SELECT 'asdf' ----- -1 -asdf - -query T -SELECT NULL UNION ALL SELECT 'asdf' ----- -NULL -asdf - -# only UNION, distinct results -query I -SELECT 1 UNION SELECT 1 ----- -1 - -query IT -SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 ----- -1 a -2 b -3 c - -query I -SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 ----- -1 -2 - -# mixed fun -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 ----- -1 a -2 b - -query IT -SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC ----- -2 b -1 a +#statement ok +#CREATE TABLE test (a INTEGER, b INTEGER); +# +#statement ok +#INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) +# +## UNION ALL, no unique results +#query I +#SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 +#---- +#11 +#12 +#13 +# +#query I +#SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +#---- +#1 +#1 +#1 +#2 +# +## mixing types, should upcast +#query T +#SELECT 1 UNION ALL SELECT 'asdf' +#---- +#1 +#asdf +# +#query T +#SELECT NULL UNION ALL SELECT 'asdf' +#---- +#NULL +#asdf +# +## only UNION, distinct results +#query I +#SELECT 1 UNION SELECT 1 +#---- +#1 +# +#query IT +#SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +#---- +#1 a +#2 b +#3 c +# +#query I +#SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +#---- +#1 +#2 +# +## mixed fun +#query IT +#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +#---- +#1 a +#2 b +# +#query IT +#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +#---- +#2 b +#1 a statement ok create table t1 as (select * from (values(1),(2),(2),(3),(3),(3),(4),(4),(4),(4)) s(x)); From 9705af80fa897c0b6fe2fe66ec61b4d8706a484f Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 12:41:46 -0800 Subject: [PATCH 28/32] very select_list is messing up the results, dont know why --- src/planner/binder/query_node/plan_setop.cpp | 23 +++++--------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 74e1ae1b69eb..85f36d981cae 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -123,6 +123,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); + // Is this necessary? + root->ResolveOperatorTypes(); unique_ptr op; @@ -145,11 +147,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { row_number->end = WindowBoundary::CURRENT_ROW_ROWS; auto left_bindings = left->GetColumnBindings(); for (idx_t i = 0; i < left_types.size(); i++) { - auto left_type = LogicalType(LogicalType::UNKNOWN); - if (i < left_types.size()) { - left_type = left_types[i]; - } - row_number->partitions.push_back(make_uniq(left_type, left_bindings[i])); + row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); } left_window->expressions.push_back(std::move(row_number)); left_window->AddChild(std::move(left)); @@ -163,11 +161,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { row_number->end = WindowBoundary::CURRENT_ROW_ROWS; auto right_bindings = right->GetColumnBindings(); for (idx_t i = 0; i < right_bindings.size(); i++) { - auto right_type = LogicalType(LogicalType::UNKNOWN); - if (i < right_types.size()) { - right_type = right_types[i]; - } - row_number->partitions.push_back(make_uniq(right_type, right_bindings[i])); + row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); } right_window->expressions.push_back(std::move(row_number)); right_window->AddChild(std::move(right)); @@ -186,13 +180,9 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { for (idx_t i = 0; i < left_bindings.size() - 1; i++) { auto cond_type_left = LogicalType(LogicalType::UNKNOWN); auto cond_type_right = LogicalType(LogicalType::UNKNOWN); - if (i < left_types.size()) { - cond_type_left = left_types[i]; - cond_type_right = right_types[i]; - } JoinCondition cond; - cond.left = make_uniq(cond_type_left, left_bindings[i]); - cond.right = make_uniq(cond_type_right, right_bindings[i]); + cond.left = make_uniq(left_types[i], left_bindings[i]); + cond.right = make_uniq(right_types[i], right_bindings[i]); cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; conditions.push_back(std::move(cond)); } @@ -228,7 +218,6 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { auto projection = make_uniq(projection_table_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); -// projection->ResolveOperatorTypes(); op = std::move(projection); } From d2829aba81fa1c6f77a45201505320306e921ffb Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 12:56:04 -0800 Subject: [PATCH 29/32] planning and binding is hard to understand --- src/planner/binder/query_node/bind_select_node.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/planner/binder/query_node/bind_select_node.cpp b/src/planner/binder/query_node/bind_select_node.cpp index a4906402b6d8..644294ce21da 100644 --- a/src/planner/binder/query_node/bind_select_node.cpp +++ b/src/planner/binder/query_node/bind_select_node.cpp @@ -514,6 +514,7 @@ unique_ptr Binder::BindSelectNode(SelectNode &statement, unique_ group_by_all_indexes.push_back(i); } + // inspect what is going on here. result->select_list.push_back(std::move(expr)); if (is_original_column) { new_names.push_back(std::move(result->names[i])); From 9241df8a687aa4b1506a2815f4f7194440e7722a Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 14:45:20 -0800 Subject: [PATCH 30/32] think I have fixed more, but still getting some erros regarding flattening correlated subqueries --- src/optimizer/optimizer.cpp | 208 ++++++++--------- src/planner/binder/query_node/plan_setop.cpp | 59 +++-- test/sql/setops/test_setops.test | 222 +++++++++---------- 3 files changed, 241 insertions(+), 248 deletions(-) diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 5eefb9978756..dc084d8871cc 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -93,38 +93,38 @@ unique_ptr Optimizer::Optimize(unique_ptr plan RunOptimizer(OptimizerType::EXPRESSION_REWRITER, [&]() { rewriter.VisitOperator(*plan); }); // perform filter pullup -// RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { -// FilterPullup filter_pullup; -// plan = filter_pullup.Rewrite(std::move(plan)); -// }); -// -// // perform filter pushdown -// RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { -// FilterPushdown filter_pushdown(*this); -// plan = filter_pushdown.Rewrite(std::move(plan)); -// }); -// -// RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { -// RegexRangeFilter regex_opt; -// plan = regex_opt.Rewrite(std::move(plan)); -// }); -// -// RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { -// InClauseRewriter ic_rewriter(context, *this); -// plan = ic_rewriter.Rewrite(std::move(plan)); -// }); -// -// // removes any redundant DelimGets/DelimJoins -// RunOptimizer(OptimizerType::DELIMINATOR, [&]() { -// Deliminator deliminator; -// plan = deliminator.Optimize(std::move(plan)); -// }); - - // Convert setop operations to joins if possible -// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { -// OperationConverter converter(*plan, binder); -// converter.Optimize(plan, true); -// }); + RunOptimizer(OptimizerType::FILTER_PULLUP, [&]() { + FilterPullup filter_pullup; + plan = filter_pullup.Rewrite(std::move(plan)); + }); + + // perform filter pushdown + RunOptimizer(OptimizerType::FILTER_PUSHDOWN, [&]() { + FilterPushdown filter_pushdown(*this); + plan = filter_pushdown.Rewrite(std::move(plan)); + }); + + RunOptimizer(OptimizerType::REGEX_RANGE, [&]() { + RegexRangeFilter regex_opt; + plan = regex_opt.Rewrite(std::move(plan)); + }); + + RunOptimizer(OptimizerType::IN_CLAUSE, [&]() { + InClauseRewriter ic_rewriter(context, *this); + plan = ic_rewriter.Rewrite(std::move(plan)); + }); + + // removes any redundant DelimGets/DelimJoins + RunOptimizer(OptimizerType::DELIMINATOR, [&]() { + Deliminator deliminator; + plan = deliminator.Optimize(std::move(plan)); + }); + +// Convert setop operations to joins if possible + RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { + OperationConverter converter(*plan, binder); + converter.Optimize(plan, true); + }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns @@ -134,78 +134,78 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // rewrites UNNESTs in DelimJoins by moving them to the projection -// RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { -// UnnestRewriter unnest_rewriter; -// plan = unnest_rewriter.Optimize(std::move(plan)); -// }); -// -// // removes unused columns -// RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { -// RemoveUnusedColumns unused(binder, context, true); -// unused.VisitOperator(*plan); -// }); -// -// // Remove duplicate groups from aggregates -// RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { -// RemoveDuplicateGroups remove; -// remove.VisitOperator(*plan); -// }); -// -// // then we extract common subexpressions inside the different operators -// RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { -// CommonSubExpressionOptimizer cse_optimizer(binder); -// cse_optimizer.VisitOperator(*plan); -// }); -// -// // creates projection maps so unused columns are projected out early -// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { -// ColumnLifetimeAnalyzer column_lifetime(true); -// column_lifetime.VisitOperator(*plan); -// }); -// -// // perform statistics propagation -// column_binding_map_t> statistics_map; -// RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { -// StatisticsPropagator propagator(*this); -// propagator.PropagateStatistics(plan); -// statistics_map = propagator.GetStatisticsMap(); -// }); -// -// // remove duplicate aggregates -// RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { -// CommonAggregateOptimizer common_aggregate; -// common_aggregate.VisitOperator(*plan); -// }); -// -// // creates projection maps so unused columns are projected out early -// RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { -// ColumnLifetimeAnalyzer column_lifetime(true); -// column_lifetime.VisitOperator(*plan); -// }); -// -// // compress data based on statistics for materializing operators -// RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { -// CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); -// compressed_materialization.Compress(plan); -// }); -// -// // transform ORDER BY + LIMIT to TopN -// RunOptimizer(OptimizerType::TOP_N, [&]() { -// TopN topn; -// plan = topn.Optimize(std::move(plan)); -// }); -// -// // apply simple expression heuristics to get an initial reordering -// RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { -// ExpressionHeuristics expression_heuristics(*this); -// plan = expression_heuristics.Rewrite(std::move(plan)); -// }); -// -// for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { -// RunOptimizer(OptimizerType::EXTENSION, [&]() { -// optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); -// }); -// } + RunOptimizer(OptimizerType::UNNEST_REWRITER, [&]() { + UnnestRewriter unnest_rewriter; + plan = unnest_rewriter.Optimize(std::move(plan)); + }); + + // removes unused columns + RunOptimizer(OptimizerType::UNUSED_COLUMNS, [&]() { + RemoveUnusedColumns unused(binder, context, true); + unused.VisitOperator(*plan); + }); + + // Remove duplicate groups from aggregates + RunOptimizer(OptimizerType::DUPLICATE_GROUPS, [&]() { + RemoveDuplicateGroups remove; + remove.VisitOperator(*plan); + }); + + // then we extract common subexpressions inside the different operators + RunOptimizer(OptimizerType::COMMON_SUBEXPRESSIONS, [&]() { + CommonSubExpressionOptimizer cse_optimizer(binder); + cse_optimizer.VisitOperator(*plan); + }); + + // creates projection maps so unused columns are projected out early + RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { + ColumnLifetimeAnalyzer column_lifetime(true); + column_lifetime.VisitOperator(*plan); + }); + + // perform statistics propagation + column_binding_map_t> statistics_map; + RunOptimizer(OptimizerType::STATISTICS_PROPAGATION, [&]() { + StatisticsPropagator propagator(*this); + propagator.PropagateStatistics(plan); + statistics_map = propagator.GetStatisticsMap(); + }); + + // remove duplicate aggregates + RunOptimizer(OptimizerType::COMMON_AGGREGATE, [&]() { + CommonAggregateOptimizer common_aggregate; + common_aggregate.VisitOperator(*plan); + }); + + // creates projection maps so unused columns are projected out early + RunOptimizer(OptimizerType::COLUMN_LIFETIME, [&]() { + ColumnLifetimeAnalyzer column_lifetime(true); + column_lifetime.VisitOperator(*plan); + }); + + // compress data based on statistics for materializing operators + RunOptimizer(OptimizerType::COMPRESSED_MATERIALIZATION, [&]() { + CompressedMaterialization compressed_materialization(context, binder, std::move(statistics_map)); + compressed_materialization.Compress(plan); + }); + + // transform ORDER BY + LIMIT to TopN + RunOptimizer(OptimizerType::TOP_N, [&]() { + TopN topn; + plan = topn.Optimize(std::move(plan)); + }); + + // apply simple expression heuristics to get an initial reordering + RunOptimizer(OptimizerType::REORDER_FILTER, [&]() { + ExpressionHeuristics expression_heuristics(*this); + plan = expression_heuristics.Rewrite(std::move(plan)); + }); + + for (auto &optimizer_extension : DBConfig::GetConfig(context).optimizer_extensions) { + RunOptimizer(OptimizerType::EXTENSION, [&]() { + optimizer_extension.optimize_function(context, optimizer_extension.optimizer_info.get(), plan); + }); + } Planner::VerifyPlan(context, plan); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 85f36d981cae..44e1d0408365 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -10,6 +10,23 @@ namespace duckdb { +static unique_ptr CreateWindowWithPartitionedRowNum(idx_t window_table_index, unique_ptr op) { + // instead create a logical projection on top of whatever to add the window expression, then + auto window = make_uniq(window_table_index); + auto row_number = + make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); + row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; + row_number->end = WindowBoundary::CURRENT_ROW_ROWS; + auto bindings = op->GetColumnBindings(); + auto types = op->types; + for (idx_t i = 0; i < types.size(); i++) { + row_number->partitions.push_back(make_uniq(types[i], bindings[i])); + } + window->expressions.push_back(std::move(row_number)); + window->AddChild(std::move(op)); + return window; +} + // Optionally push a PROJECTION operator unique_ptr Binder::CastLogicalOperatorToTypes(vector &source_types, vector &target_types, @@ -135,37 +152,12 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { auto left_types = root->children[0]->types; auto right_types = root->children[1]->types; auto old_bindings = root->GetColumnBindings(); - auto table_index = root->table_index; if (node.setop_all) { + auto window_left_table_id = GenerateTableIndex(); + root->children[0] = CreateWindowWithPartitionedRowNum(window_left_table_id, std::move(root->children[0])); - // instead create a logical projection on top of whatever to add the window expression, then - auto left_window_table_index = GenerateTableIndex(); - auto left_window = make_uniq(left_window_table_index); - auto row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto left_bindings = left->GetColumnBindings(); - for (idx_t i = 0; i < left_types.size(); i++) { - row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); - } - left_window->expressions.push_back(std::move(row_number)); - left_window->AddChild(std::move(left)); - left = std::move(left_window); - - auto right_window_table_index = GenerateTableIndex(); - auto right_window = make_uniq(right_window_table_index); - row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto right_bindings = right->GetColumnBindings(); - for (idx_t i = 0; i < right_bindings.size(); i++) { - row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); - } - right_window->expressions.push_back(std::move(row_number)); - right_window->AddChild(std::move(right)); - right = std::move(right_window); + auto window_right_table_id = GenerateTableIndex(); + root->children[1] = CreateWindowWithPartitionedRowNum(window_right_table_id, std::move(root->children[1])); root->types.push_back(LogicalType::BIGINT); root->column_count += 1; @@ -177,7 +169,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { vector conditions; // create equality condition for all columns - for (idx_t i = 0; i < left_bindings.size() - 1; i++) { + idx_t binding_offset = node.setop_all ? 1 : 0; + for (idx_t i = 0; i < left_bindings.size() - binding_offset; i++) { auto cond_type_left = LogicalType(LogicalType::UNKNOWN); auto cond_type_right = LogicalType(LogicalType::UNKNOWN); JoinCondition cond; @@ -187,6 +180,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { conditions.push_back(std::move(cond)); } + // create condition for the row number as well. if (node.setop_all) { JoinCondition cond; cond.left = @@ -214,9 +208,8 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { for (idx_t i = 0; i < bindings.size() - 1; i++) { projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); } - auto projection_table_index = GenerateTableIndex(); auto projection = - make_uniq(projection_table_index, std::move(projection_select_list)); + make_uniq(node.setop_index, std::move(projection_select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); } @@ -235,7 +228,7 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { distinct->children.push_back(std::move(op)); op = std::move(distinct); - auto projection = make_uniq(table_index, std::move(select_list)); + auto projection = make_uniq(node.setop_index, std::move(select_list)); projection->children.push_back(std::move(op)); op = std::move(projection); op->ResolveOperatorTypes(); diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 79bef33b31f4..67be915e6f92 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -2,118 +2,118 @@ # description: Test UNION/EXCEPT/INTERSECT # group: [setops] -#statement ok -#PRAGMA enable_verification - -#query I -#SELECT 1 UNION ALL SELECT 2 -#---- -#1 -#2 -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 -#---- -#1 a -#2 b -#3 c -#4 d -# -## UNION/EXCEPT/INTERSECT with NULL values -#query I -#SELECT NULL UNION SELECT NULL -#---- -#NULL - -#query I -#SELECT NULL EXCEPT SELECT NULL -#---- - -#query I -#SELECT NULL INTERSECT SELECT NULL -#---- -#NULL -# +statement ok +PRAGMA enable_verification + +query I +SELECT 1 UNION ALL SELECT 2 +---- +1 +2 + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' order by 1 +---- +1 a +2 b +3 c + +query IT +SELECT 1, 'a' UNION ALL SELECT 2, 'b' UNION ALL SELECT 3, 'c' UNION ALL SELECT 4, 'd' order by 1 +---- +1 a +2 b +3 c +4 d + +# UNION/EXCEPT/INTERSECT with NULL values +query I +SELECT NULL UNION SELECT NULL +---- +NULL + +query I +SELECT NULL EXCEPT SELECT NULL +---- + +query I +SELECT NULL INTERSECT SELECT NULL +---- +NULL + # create tables -#statement ok -#CREATE TABLE test (a INTEGER, b INTEGER); -# -#statement ok -#INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) -# -## UNION ALL, no unique results -#query I -#SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 -#---- -#11 -#12 -#13 -# -#query I -#SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 -#---- -#1 -#1 -#1 -#2 -# -## mixing types, should upcast -#query T -#SELECT 1 UNION ALL SELECT 'asdf' -#---- -#1 -#asdf -# -#query T -#SELECT NULL UNION ALL SELECT 'asdf' -#---- -#NULL -#asdf -# -## only UNION, distinct results -#query I -#SELECT 1 UNION SELECT 1 -#---- -#1 -# -#query IT -#SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 -#---- -#1 a -#2 b -#3 c -# -#query I -#SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 -#---- -#1 -#2 -# -## mixed fun -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 -#---- -#1 a -#2 b -# -#query IT -#SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC -#---- -#2 b -#1 a +statement ok +CREATE TABLE test (a INTEGER, b INTEGER); + +statement ok +INSERT INTO test VALUES (11, 1), (12, 1), (13, 2) + +# UNION ALL, no unique results +query I +SELECT a FROM test WHERE a < 13 UNION ALL SELECT a FROM test WHERE a = 13 +---- +11 +12 +13 + +query I +SELECT b FROM test WHERE a < 13 UNION ALL SELECT b FROM test WHERE a > 11 +---- +1 +1 +1 +2 + +# mixing types, should upcast +query T +SELECT 1 UNION ALL SELECT 'asdf' +---- +1 +asdf + +query T +SELECT NULL UNION ALL SELECT 'asdf' +---- +NULL +asdf + +# only UNION, distinct results +query I +SELECT 1 UNION SELECT 1 +---- +1 + +query IT +SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 3, 'c' UNION SELECT 1, 'a' order by 1 +---- +1 a +2 b +3 c + +query I +SELECT b FROM test WHERE a < 13 UNION SELECT b FROM test WHERE a > 11 ORDER BY 1 +---- +1 +2 + +# mixed fun +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 +---- +1 a +2 b + +query IT +SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' ORDER BY 1 DESC +---- +2 b +1 a statement ok create table t1 as (select * from (values(1),(2),(2),(3),(3),(3),(4),(4),(4),(4)) s(x)); From dfebb8777e76a0010bc75483a4268c81d994be06 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 14:47:54 -0800 Subject: [PATCH 31/32] remove unused code --- .../physical_plan/plan_set_operation.cpp | 64 +------------------ src/optimizer/optimizer.cpp | 8 +-- 2 files changed, 5 insertions(+), 67 deletions(-) diff --git a/src/execution/physical_plan/plan_set_operation.cpp b/src/execution/physical_plan/plan_set_operation.cpp index 4552abf154d3..5dac7235aeb3 100644 --- a/src/execution/physical_plan/plan_set_operation.cpp +++ b/src/execution/physical_plan/plan_set_operation.cpp @@ -10,19 +10,6 @@ namespace duckdb { -static vector> CreatePartitionedRowNumExpression(const vector &types) { - vector> res; - auto expr = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - expr->start = WindowBoundary::UNBOUNDED_PRECEDING; - expr->end = WindowBoundary::UNBOUNDED_FOLLOWING; - for (idx_t i = 0; i < types.size(); i++) { - expr->partitions.push_back(make_uniq(types[i], i)); - } - res.push_back(std::move(expr)); - return res; -} - static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t i) { JoinCondition cond; cond.left = make_uniq(type, i); @@ -53,56 +40,7 @@ unique_ptr PhysicalPlanGenerator::CreatePlan(LogicalSetOperati break; case LogicalOperatorType::LOGICAL_EXCEPT: case LogicalOperatorType::LOGICAL_INTERSECT: { - - - auto &types = left->GetTypes(); - vector conditions; - // create equality condition for all columns - for (idx_t i = 0; i < types.size(); i++) { - conditions.push_back(CreateNotDistinctComparison(types[i], i)); - } - // For EXCEPT ALL / INTERSECT ALL we push a window operator with a ROW_NUMBER into the scans and join to get bag - // semantics. - if (op.setop_all) { - vector window_types = types; - window_types.push_back(LogicalType::BIGINT); - - auto window_left = make_uniq(window_types, CreatePartitionedRowNumExpression(types), - left->estimated_cardinality); - window_left->children.push_back(std::move(left)); - left = std::move(window_left); - - auto window_right = make_uniq(window_types, CreatePartitionedRowNumExpression(types), - right->estimated_cardinality); - window_right->children.push_back(std::move(right)); - right = std::move(window_right); - - // add window expression result to join condition - conditions.push_back(CreateNotDistinctComparison(LogicalType::BIGINT, types.size())); - // join (created below) now includes the row number result column - op.types.push_back(LogicalType::BIGINT); - } - - // EXCEPT is ANTI join - // INTERSECT is SEMI join - PerfectHashJoinStats join_stats; // used in inner joins only - - JoinType join_type = op.type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - result = make_uniq(op, std::move(left), std::move(right), std::move(conditions), join_type, - op.estimated_cardinality, join_stats); - - // For EXCEPT ALL / INTERSECT ALL we need to remove the row number column again - if (op.setop_all) { - vector> projection_select_list; - for (idx_t i = 0; i < types.size(); i++) { - projection_select_list.push_back(make_uniq(types[i], i)); - } - auto projection = - make_uniq(types, std::move(projection_select_list), op.estimated_cardinality); - projection->children.push_back(std::move(result)); - result = std::move(projection); - } - break; + throw InternalException("Logical Except/Intersect should have been transformed to semi anti before the physical planning phase"); } default: throw InternalException("Unexpected operator type for set operation"); diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index dc084d8871cc..3aaa1a6572ad 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -121,10 +121,10 @@ unique_ptr Optimizer::Optimize(unique_ptr plan }); // Convert setop operations to joins if possible - RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { - OperationConverter converter(*plan, binder); - converter.Optimize(plan, true); - }); +// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { +// OperationConverter converter(*plan, binder); +// converter.Optimize(plan, true); +// }); // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns From 31ab2a6e234a0ccc26d427ab5377482e974864ac Mon Sep 17 00:00:00 2001 From: Tmonster Date: Tue, 19 Dec 2023 17:10:03 -0800 Subject: [PATCH 32/32] more removal of dead code --- src/common/enum_util.cpp | 5 - src/common/enums/optimizer_type.cpp | 1 - .../duckdb/common/enums/optimizer_type.hpp | 1 - .../duckdb/optimizer/operation_converter.hpp | 28 --- src/optimizer/CMakeLists.txt | 1 - src/optimizer/operation_converter.cpp | 172 ------------------ src/optimizer/optimizer.cpp | 7 - .../binder/query_node/bind_select_node.cpp | 1 - src/planner/binder/query_node/plan_setop.cpp | 2 +- test/sql/setops/test_setops.test | 20 +- 10 files changed, 4 insertions(+), 234 deletions(-) delete mode 100644 src/include/duckdb/optimizer/operation_converter.hpp delete mode 100644 src/optimizer/operation_converter.cpp diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index 5e6cb44d0bca..806480ea68d8 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -3605,8 +3605,6 @@ const char* EnumUtil::ToChars(OptimizerType value) { return "JOIN_ORDER"; case OptimizerType::DELIMINATOR: return "DELIMINATOR"; - case OptimizerType::OPERATION_CONVERTER: - return "OPERATION_CONVERTER"; case OptimizerType::UNNEST_REWRITER: return "UNNEST_REWRITER"; case OptimizerType::UNUSED_COLUMNS: @@ -3660,9 +3658,6 @@ OptimizerType EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "DELIMINATOR")) { return OptimizerType::DELIMINATOR; } - if (StringUtil::Equals(value, "OPERATION_CONVERTER")) { - return OptimizerType::OPERATION_CONVERTER; - } if (StringUtil::Equals(value, "UNNEST_REWRITER")) { return OptimizerType::UNNEST_REWRITER; } diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp index 663a80456f72..a8365ab76d57 100644 --- a/src/common/enums/optimizer_type.cpp +++ b/src/common/enums/optimizer_type.cpp @@ -16,7 +16,6 @@ static DefaultOptimizerType internal_optimizer_types[] = { {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, {"regex_range", OptimizerType::REGEX_RANGE}, {"in_clause", OptimizerType::IN_CLAUSE}, - {"operation_converter", OptimizerType::OPERATION_CONVERTER}, {"join_order", OptimizerType::JOIN_ORDER}, {"deliminator", OptimizerType::DELIMINATOR}, {"unnest_rewriter", OptimizerType::UNNEST_REWRITER}, diff --git a/src/include/duckdb/common/enums/optimizer_type.hpp b/src/include/duckdb/common/enums/optimizer_type.hpp index cb4d3063e901..c687ec406e54 100644 --- a/src/include/duckdb/common/enums/optimizer_type.hpp +++ b/src/include/duckdb/common/enums/optimizer_type.hpp @@ -22,7 +22,6 @@ enum class OptimizerType : uint32_t { IN_CLAUSE, JOIN_ORDER, DELIMINATOR, - OPERATION_CONVERTER, UNNEST_REWRITER, UNUSED_COLUMNS, STATISTICS_PROPAGATION, diff --git a/src/include/duckdb/optimizer/operation_converter.hpp b/src/include/duckdb/optimizer/operation_converter.hpp deleted file mode 100644 index e00a7b9c35c4..000000000000 --- a/src/include/duckdb/optimizer/operation_converter.hpp +++ /dev/null @@ -1,28 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/optimizer/operation_converter.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/optimizer/column_binding_replacer.hpp" - -namespace duckdb { - -//! The Operation Converter converts Set operations to joins when possible -class OperationConverter { -public: - OperationConverter(LogicalOperator &root, Binder &binder); - - //! Perform DelimJoin elimination - void Optimize(unique_ptr &op, bool is_root = false); - LogicalOperator &root; - Binder &binder; - -private: -}; - -} // namespace duckdb diff --git a/src/optimizer/CMakeLists.txt b/src/optimizer/CMakeLists.txt index 39f3f84caccd..53f57e1c168d 100644 --- a/src/optimizer/CMakeLists.txt +++ b/src/optimizer/CMakeLists.txt @@ -14,7 +14,6 @@ add_library_unity( compressed_materialization.cpp cse_optimizer.cpp deliminator.cpp - operation_converter.cpp unnest_rewriter.cpp column_lifetime_analyzer.cpp expression_heuristics.cpp diff --git a/src/optimizer/operation_converter.cpp b/src/optimizer/operation_converter.cpp deleted file mode 100644 index e08cd4a5ae6d..000000000000 --- a/src/optimizer/operation_converter.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include "duckdb/optimizer/operation_converter.hpp" -#include "duckdb/planner/expression/bound_cast_expression.hpp" -#include "duckdb/planner/expression/bound_conjunction_expression.hpp" -#include "duckdb/planner/expression/bound_columnref_expression.hpp" -#include "duckdb/planner/operator/logical_delim_get.hpp" -#include "duckdb/common/enums/join_type.hpp" -#include "duckdb/planner/joinside.hpp" -#include "duckdb/planner/operator/logical_comparison_join.hpp" -#include "duckdb/planner/operator/logical_window.hpp" -#include "duckdb/planner/operator/logical_set_operation.hpp" -#include "duckdb/planner/expression/bound_reference_expression.hpp" -#include "duckdb/planner/expression/bound_window_expression.hpp" -#include "duckdb/planner/operator/logical_projection.hpp" -#include "duckdb/planner/operator/logical_distinct.hpp" -#include "iostream" -namespace duckdb { - -OperationConverter::OperationConverter(LogicalOperator &root, Binder &binder) : root(root), binder(binder) { - root.ResolveOperatorTypes(); -} - -void OperationConverter::Optimize(unique_ptr &op, bool is_root) { - for (auto &child : op->children) { - Optimize(child); - } - switch (op->type) { - // if it is setop all, we don't replace (even though we technically still could) - // if it is not setop all, duplicate elimination should happen - case LogicalOperatorType::LOGICAL_INTERSECT: - case LogicalOperatorType::LOGICAL_EXCEPT: { - auto &set_op = op->Cast(); - auto setop_all = set_op.setop_all; - if (is_root) { - // if the operator is the root, then break. We don't want to update the root - // and the join order optimizer can still have an effect. - break; - } - - auto table_index = set_op.table_index; - auto &left = op->children[0]; - auto left_types = op->children[0]->types; - auto &right = op->children[1]; - auto right_types = op->children[1]->types; - - auto old_bindings = op->GetColumnBindings(); - - if (setop_all) { - - // instead create a logical projection on top of whatever to add the window expression, then - auto left_window_table_index = binder.GenerateTableIndex(); - auto left_window = make_uniq(left_window_table_index); - auto row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto left_bindings = left->GetColumnBindings(); - for (idx_t i = 0; i < left_types.size(); i++) { - row_number->partitions.push_back(make_uniq(left_types[i], left_bindings[i])); - } - left_window->expressions.push_back(std::move(row_number)); - left_window->AddChild(std::move(left)); - left = std::move(left_window); - - auto right_window_table_index = binder.GenerateTableIndex(); - auto right_window = make_uniq(right_window_table_index); - row_number = - make_uniq(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr); - row_number->start = WindowBoundary::UNBOUNDED_PRECEDING; - row_number->end = WindowBoundary::CURRENT_ROW_ROWS; - auto right_bindings = right->GetColumnBindings(); - for (idx_t i = 0; i < right_bindings.size(); i++) { - row_number->partitions.push_back(make_uniq(right_types[i], right_bindings[i])); - } - right_window->expressions.push_back(std::move(row_number)); - right_window->AddChild(std::move(right)); - right = std::move(right_window); - - set_op.types.push_back(LogicalType::BIGINT); - set_op.column_count += 1; - } - auto left_bindings = left->GetColumnBindings(); - auto right_bindings = right->GetColumnBindings(); - D_ASSERT(left_bindings.size() == right_bindings.size()); - - if (setop_all) { - D_ASSERT(left_types.size() + 1 == left_bindings.size()); - D_ASSERT(right_types.size() + 1 == right_bindings.size()); - } - vector conditions; - // create equality condition for all columns - for (idx_t i = 0; i < left_types.size(); i++) { - JoinCondition cond; - cond.left = make_uniq(left_types[i], left_bindings[i]); - cond.right = make_uniq(right_types[i], right_bindings[i]); - cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; - conditions.push_back(std::move(cond)); - } - - if (setop_all) { - JoinCondition cond; - cond.left = - make_uniq(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]); - cond.right = - make_uniq(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]); - cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM; - conditions.push_back(std::move(cond)); - } - - JoinType join_type = op->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI; - - auto join_op = make_uniq(join_type); - join_op->children.push_back(std::move(left)); - join_op->children.push_back(std::move(right)); - join_op->conditions = std::move(conditions); - join_op->ResolveOperatorTypes(); - - op = std::move(join_op); - - // create projection to remove row_id. - if (setop_all) { - vector> projection_select_list; - auto bindings = op->GetColumnBindings(); - for (idx_t i = 0; i < bindings.size() - 1; i++) { - projection_select_list.push_back(make_uniq(op->types[i], bindings[i])); - } - auto projection_table_index = binder.GenerateTableIndex(); - auto projection = - make_uniq(projection_table_index, std::move(projection_select_list)); - projection->children.push_back(std::move(op)); - projection->ResolveOperatorTypes(); - op = std::move(projection); - } - - if (!setop_all) { - // push a distinct operator on the join - auto &types = op->types; - auto join_bindings = op->GetColumnBindings(); - vector> distinct_targets; - vector> select_list; - for (idx_t i = 0; i < join_bindings.size(); i++) { - distinct_targets.push_back(make_uniq(types[i], join_bindings[i])); - select_list.push_back(make_uniq(types[i], join_bindings[i])); - } - auto distinct = make_uniq(std::move(distinct_targets), DistinctType::DISTINCT); - distinct->children.push_back(std::move(op)); - op = std::move(distinct); - - auto projection = make_uniq(table_index, std::move(select_list)); - projection->children.push_back(std::move(op)); - op = std::move(projection); - op->ResolveOperatorTypes(); - } - // now perform column binding replacement - auto new_bindings = op->GetColumnBindings(); - -// D_ASSERT(old_bindings.size() == new_bindings.size()); - vector replacement_bindings; - for (idx_t i = 0; i < old_bindings.size(); i++) { - replacement_bindings.push_back({old_bindings[i], new_bindings[i]}); - } - - auto binding_replacer = ColumnBindingReplacer(); - binding_replacer.replacement_bindings = replacement_bindings; - binding_replacer.VisitOperator(root); - break; - } - default: - break; - } -} - -} // namespace duckdb diff --git a/src/optimizer/optimizer.cpp b/src/optimizer/optimizer.cpp index 3aaa1a6572ad..0a66f0007761 100644 --- a/src/optimizer/optimizer.cpp +++ b/src/optimizer/optimizer.cpp @@ -9,7 +9,6 @@ #include "duckdb/optimizer/compressed_materialization.hpp" #include "duckdb/optimizer/cse_optimizer.hpp" #include "duckdb/optimizer/deliminator.hpp" -#include "duckdb/optimizer/operation_converter.hpp" #include "duckdb/optimizer/expression_heuristics.hpp" #include "duckdb/optimizer/filter_pullup.hpp" #include "duckdb/optimizer/filter_pushdown.hpp" @@ -120,12 +119,6 @@ unique_ptr Optimizer::Optimize(unique_ptr plan plan = deliminator.Optimize(std::move(plan)); }); -// Convert setop operations to joins if possible -// RunOptimizer(OptimizerType::OPERATION_CONVERTER, [&]() { -// OperationConverter converter(*plan, binder); -// converter.Optimize(plan, true); -// }); - // then we perform the join ordering optimization // this also rewrites cross products + filters into joins and performs filter pushdowns RunOptimizer(OptimizerType::JOIN_ORDER, [&]() { diff --git a/src/planner/binder/query_node/bind_select_node.cpp b/src/planner/binder/query_node/bind_select_node.cpp index 644294ce21da..a4906402b6d8 100644 --- a/src/planner/binder/query_node/bind_select_node.cpp +++ b/src/planner/binder/query_node/bind_select_node.cpp @@ -514,7 +514,6 @@ unique_ptr Binder::BindSelectNode(SelectNode &statement, unique_ group_by_all_indexes.push_back(i); } - // inspect what is going on here. result->select_list.push_back(std::move(expr)); if (is_original_column) { new_names.push_back(std::move(result->names[i])); diff --git a/src/planner/binder/query_node/plan_setop.cpp b/src/planner/binder/query_node/plan_setop.cpp index 44e1d0408365..57e3f1adac19 100644 --- a/src/planner/binder/query_node/plan_setop.cpp +++ b/src/planner/binder/query_node/plan_setop.cpp @@ -140,12 +140,12 @@ unique_ptr Binder::CreatePlan(BoundSetOperationNode &node) { // need. auto root = make_uniq(node.setop_index, node.types.size(), std::move(left_node), std::move(right_node), logical_type, node.setop_all); - // Is this necessary? root->ResolveOperatorTypes(); unique_ptr op; // if we have an intersect or except, immediately translate it to a semi or anti join. + // Unions stay as they are. if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) { auto &left = root->children[0]; auto &right = root->children[1]; diff --git a/test/sql/setops/test_setops.test b/test/sql/setops/test_setops.test index 67be915e6f92..d0cc7f3cdb7b 100644 --- a/test/sql/setops/test_setops.test +++ b/test/sql/setops/test_setops.test @@ -115,25 +115,11 @@ SELECT 1, 'a' UNION ALL SELECT 1, 'a' UNION SELECT 2, 'b' UNION SELECT 1, 'a' OR 2 b 1 a -statement ok -create table t1 as (select * from (values(1),(2),(2),(3),(3),(3),(4),(4),(4),(4)) s(x)); - -statement ok -create table t2 as (select * from (values(1),(3),(3)) t(x)); - -statement ok -create table t3 as (select * from (values(2),(2),(2),(4),(3),(3)) u(x)); - -statement ok -select * from (select * from t1 except all (select * from t2)); - # EXCEPT ALL / INTERSECT ALL query II -select *, count(*) from -(select * from - ((select x from t1 except all (select x from t2)) - intersect all -select x from t3)) group by x order by x; +select x, count(*) as c +from ((select * from (values(1),(2),(2),(3),(3),(3),(4),(4),(4),(4)) s(x) except all select * from (values(1),(3),(3)) t(x)) intersect all select * from (values(2),(2),(2),(4),(3),(3)) u(x)) s +group by x order by x ---- 2 2 3 1