Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
1e68114
add operation converter class
Tmonster Nov 2, 2023
337e73e
it compiles
Tmonster Nov 2, 2023
8c4c3ee
convert intersect and except to logical comparison joins
Tmonster Nov 10, 2023
b461be2
it works. Thanks @lnkuiper
Tmonster Nov 10, 2023
04ce6a3
remove unused variable
Tmonster Nov 10, 2023
8d6e6b6
no unrecognized parameter
Tmonster Nov 13, 2023
193b7d8
tidy fix
Tmonster Nov 13, 2023
77c8bfb
change test file. Figure out this logical execute stuff
Tmonster Nov 13, 2023
aa7cb1c
Merge branch 'feature' into 568-operation-converter-optimizer
Tmonster Nov 13, 2023
7ef936f
remove cout
Tmonster Nov 13, 2023
1a0cc71
fix resolve types error
Tmonster Nov 13, 2023
55c0d05
move test group
Tmonster Nov 13, 2023
4e02cbc
figuring out why this execute is being an issue
Tmonster Nov 13, 2023
ec6e36e
more comments to pick up on
Tmonster Nov 13, 2023
0c2151f
logical execute should still resolve types in case resolveTypes is ca…
Tmonster Nov 14, 2023
7a70a7e
remove unused code
Tmonster Nov 14, 2023
0505a10
tidy fixes
Tmonster Nov 14, 2023
16eda68
clang tidy. more fixes
Tmonster Nov 15, 2023
b7f298b
remove redundant return true
Tmonster Nov 15, 2023
bdc72cc
naming fix
Tmonster Nov 15, 2023
7c1dfb8
Merge branch 'main' into 568-operation-converter-optimizer
Tmonster Nov 23, 2023
08d293e
very lost. Dont know how to push an aggregate on the join. I think I …
Tmonster Nov 23, 2023
9d89e86
just add a logical distinct instead
Tmonster Nov 24, 2023
a3497ad
fix broken tests after adding distinct
Tmonster Nov 27, 2023
63767ec
Merge branch 'main' into 568-operation-converter-optimizer
Tmonster Nov 28, 2023
ade48bf
add header
Tmonster Nov 29, 2023
cd78dcb
Merge branch 'main' into 568-operation-converter-optimizer
Tmonster Dec 4, 2023
6909c88
clean up PR
Tmonster Dec 4, 2023
bff93c3
Merge remote-tracking branch 'upstream/main' into 568-operation-conve…
Tmonster Dec 19, 2023
d4e70d4
try to do everything in the optimizer
Tmonster Dec 19, 2023
4cc6c65
all passes now, need to clean this up and move it to the planning phase
Tmonster Dec 19, 2023
cbb7236
honestly having much more trouble moving this to the planner than I t…
Tmonster Dec 19, 2023
9705af8
very select_list is messing up the results, dont know why
Tmonster Dec 19, 2023
d2829ab
planning and binding is hard to understand
Tmonster Dec 19, 2023
9241df8
think I have fixed more, but still getting some erros regarding flatt…
Tmonster Dec 19, 2023
dfebb87
remove unused code
Tmonster Dec 19, 2023
31ab2a6
more removal of dead code
Tmonster Dec 20, 2023
bd95c58
Merge remote-tracking branch 'upstream/main' into 568-operation-conve…
Tmonster Dec 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 3 additions & 61 deletions src/execution/physical_plan/plan_set_operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,6 @@

namespace duckdb {

static vector<unique_ptr<Expression>> CreatePartitionedRowNumExpression(const vector<LogicalType> &types) {
vector<unique_ptr<Expression>> res;
auto expr =
make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr);
expr->start = WindowBoundary::UNBOUNDED_PRECEDING;
expr->end = WindowBoundary::UNBOUNDED_FOLLOWING;
for (idx_t i = 0; i < types.size(); i++) {
expr->partitions.push_back(make_uniq<BoundReferenceExpression>(types[i], i));
}
res.push_back(std::move(expr));
return res;
}

static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t i) {
JoinCondition cond;
cond.left = make_uniq<BoundReferenceExpression>(type, i);
Expand All @@ -43,6 +30,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSetOperati
throw InvalidInputException("Type mismatch for SET OPERATION");
}

// can't swich logical unions to semi/anti join
// also if the operation is a INTERSECT ALL or EXCEPT ALL
switch (op.type) {
case LogicalOperatorType::LOGICAL_UNION:
// UNION
Expand All @@ -51,54 +40,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSetOperati
break;
case LogicalOperatorType::LOGICAL_EXCEPT:
case LogicalOperatorType::LOGICAL_INTERSECT: {
auto &types = left->GetTypes();
vector<JoinCondition> conditions;
// create equality condition for all columns
for (idx_t i = 0; i < types.size(); i++) {
conditions.push_back(CreateNotDistinctComparison(types[i], i));
}
// For EXCEPT ALL / INTERSECT ALL we push a window operator with a ROW_NUMBER into the scans and join to get bag
// semantics.
if (op.setop_all) {
vector<LogicalType> window_types = types;
window_types.push_back(LogicalType::BIGINT);

auto window_left = make_uniq<PhysicalWindow>(window_types, CreatePartitionedRowNumExpression(types),
left->estimated_cardinality);
window_left->children.push_back(std::move(left));
left = std::move(window_left);

auto window_right = make_uniq<PhysicalWindow>(window_types, CreatePartitionedRowNumExpression(types),
right->estimated_cardinality);
window_right->children.push_back(std::move(right));
right = std::move(window_right);

// add window expression result to join condition
conditions.push_back(CreateNotDistinctComparison(LogicalType::BIGINT, types.size()));
// join (created below) now includes the row number result column
op.types.push_back(LogicalType::BIGINT);
}

// EXCEPT is ANTI join
// INTERSECT is SEMI join
PerfectHashJoinStats join_stats; // used in inner joins only

JoinType join_type = op.type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI;
result = make_uniq<PhysicalHashJoin>(op, std::move(left), std::move(right), std::move(conditions), join_type,
op.estimated_cardinality, join_stats);

// For EXCEPT ALL / INTERSECT ALL we need to remove the row number column again
if (op.setop_all) {
vector<unique_ptr<Expression>> projection_select_list;
for (idx_t i = 0; i < types.size(); i++) {
projection_select_list.push_back(make_uniq<BoundReferenceExpression>(types[i], i));
}
auto projection =
make_uniq<PhysicalProjection>(types, std::move(projection_select_list), op.estimated_cardinality);
projection->children.push_back(std::move(result));
result = std::move(projection);
}
break;
throw InternalException("Logical Except/Intersect should have been transformed to semi anti before the physical planning phase");
}
default:
throw InternalException("Unexpected operator type for set operation");
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/planner/operator/logical_execute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class LogicalExecute : public LogicalOperator {

protected:
void ResolveTypes() override {
// already resolved
types = prepared->types;
}
vector<ColumnBinding> GetColumnBindings() override {
return GenerateColumnBindings(0, types.size());
Expand Down
2 changes: 1 addition & 1 deletion src/main/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
namespace duckdb {

#ifdef DEBUG
bool DBConfigOptions::debug_print_bindings = false;
bool DBConfigOptions::debug_print_bindings = true;
#endif

#define DUCKDB_GLOBAL(_PARAM) \
Expand Down
3 changes: 2 additions & 1 deletion src/optimizer/join_order/relation_statistics_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log
}
ret.stats_initialized = true;
ret.filter_strength = 1;
ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name;
ret.table_name =
"(" + child_stats[0].table_name + LogicalOperatorToString(op.type) + child_stats[1].table_name + ")";
for (auto &stats : child_stats) {
// MARK joins are nonreorderable. They won't return initialized stats
// continue in this case.
Expand Down
116 changes: 116 additions & 0 deletions src/planner/binder/query_node/plan_setop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,31 @@
#include "duckdb/planner/expression/bound_cast_expression.hpp"
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
#include "duckdb/planner/operator/logical_projection.hpp"
#include "duckdb/planner/operator/logical_window.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/expression/bound_window_expression.hpp"
#include "duckdb/planner/operator/logical_set_operation.hpp"
#include "duckdb/planner/query_node/bound_set_operation_node.hpp"

namespace duckdb {

static unique_ptr<LogicalWindow> CreateWindowWithPartitionedRowNum(idx_t window_table_index, unique_ptr<LogicalOperator> op) {
// instead create a logical projection on top of whatever to add the window expression, then
auto window = make_uniq<LogicalWindow>(window_table_index);
auto row_number =
make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr);
row_number->start = WindowBoundary::UNBOUNDED_PRECEDING;
row_number->end = WindowBoundary::CURRENT_ROW_ROWS;
auto bindings = op->GetColumnBindings();
auto types = op->types;
for (idx_t i = 0; i < types.size(); i++) {
row_number->partitions.push_back(make_uniq<BoundColumnRefExpression>(types[i], bindings[i]));
}
window->expressions.push_back(std::move(row_number));
window->AddChild(std::move(op));
return window;
}

// Optionally push a PROJECTION operator
unique_ptr<LogicalOperator> Binder::CastLogicalOperatorToTypes(vector<LogicalType> &source_types,
vector<LogicalType> &target_types,
Expand Down Expand Up @@ -116,9 +136,105 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundSetOperationNode &node) {
break;
}

// here we convert the set operation to anti semi if required. Using the node.setop all we know what conversion we
// need.
auto root = make_uniq<LogicalSetOperation>(node.setop_index, node.types.size(), std::move(left_node),
std::move(right_node), logical_type, node.setop_all);
root->ResolveOperatorTypes();

unique_ptr<LogicalOperator> op;

// if we have an intersect or except, immediately translate it to a semi or anti join.
// Unions stay as they are.
if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) {
auto &left = root->children[0];
auto &right = root->children[1];
auto left_types = root->children[0]->types;
auto right_types = root->children[1]->types;
auto old_bindings = root->GetColumnBindings();
if (node.setop_all) {
auto window_left_table_id = GenerateTableIndex();
root->children[0] = CreateWindowWithPartitionedRowNum(window_left_table_id, std::move(root->children[0]));

auto window_right_table_id = GenerateTableIndex();
root->children[1] = CreateWindowWithPartitionedRowNum(window_right_table_id, std::move(root->children[1]));

root->types.push_back(LogicalType::BIGINT);
root->column_count += 1;
}

auto left_bindings = left->GetColumnBindings();
auto right_bindings = right->GetColumnBindings();
D_ASSERT(left_bindings.size() == right_bindings.size());

vector<JoinCondition> conditions;
// create equality condition for all columns
idx_t binding_offset = node.setop_all ? 1 : 0;
for (idx_t i = 0; i < left_bindings.size() - binding_offset; i++) {
auto cond_type_left = LogicalType(LogicalType::UNKNOWN);
auto cond_type_right = LogicalType(LogicalType::UNKNOWN);
JoinCondition cond;
cond.left = make_uniq<BoundColumnRefExpression>(left_types[i], left_bindings[i]);
cond.right = make_uniq<BoundColumnRefExpression>(right_types[i], right_bindings[i]);
cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
conditions.push_back(std::move(cond));
}

// create condition for the row number as well.
if (node.setop_all) {
JoinCondition cond;
cond.left =
make_uniq<BoundColumnRefExpression>(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]);
cond.right =
make_uniq<BoundColumnRefExpression>(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]);
cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
conditions.push_back(std::move(cond));
}

JoinType join_type = root->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI;

auto join_op = make_uniq<LogicalComparisonJoin>(join_type);
join_op->children.push_back(std::move(left));
join_op->children.push_back(std::move(right));
join_op->conditions = std::move(conditions);
join_op->ResolveOperatorTypes();

op = std::move(join_op);

// create projection to remove row_id.
if (node.setop_all) {
vector<unique_ptr<Expression>> projection_select_list;
auto bindings = op->GetColumnBindings();
for (idx_t i = 0; i < bindings.size() - 1; i++) {
projection_select_list.push_back(make_uniq<BoundColumnRefExpression>(op->types[i], bindings[i]));
}
auto projection =
make_uniq<LogicalProjection>(node.setop_index, std::move(projection_select_list));
projection->children.push_back(std::move(op));
op = std::move(projection);
}

if (!node.setop_all) {
// push a distinct operator on the join
auto &types = op->types;
auto join_bindings = op->GetColumnBindings();
vector<unique_ptr<Expression>> distinct_targets;
vector<unique_ptr<Expression>> select_list;
for (idx_t i = 0; i < join_bindings.size(); i++) {
distinct_targets.push_back(make_uniq<BoundColumnRefExpression>(types[i], join_bindings[i]));
select_list.push_back(make_uniq<BoundColumnRefExpression>(types[i], join_bindings[i]));
}
auto distinct = make_uniq<LogicalDistinct>(std::move(distinct_targets), DistinctType::DISTINCT);
distinct->children.push_back(std::move(op));
op = std::move(distinct);

auto projection = make_uniq<LogicalProjection>(node.setop_index, std::move(select_list));
projection->children.push_back(std::move(op));
op = std::move(projection);
op->ResolveOperatorTypes();
}
return VisitQueryNode(node, std::move(op));
}
return VisitQueryNode(node, std::move(root));
}

Expand Down
13 changes: 13 additions & 0 deletions test/optimizer/setops/operation_converter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# name: test/optimizer/setops/operation_converter.test
# description: converting intersect/except to semi anti
# group: [setops]

statement ok
create table left_table as select range as a from range(100);

statement ok
create table right_table as select range*2 as b from range(10000);

statement ok
select * from left_table intersect select * from right_table;

Original file line number Diff line number Diff line change
@@ -1,68 +1,68 @@
# name: test/optimizer/pushdown_set_op.test
# name: test/optimizer/setops/pushdown_set_op.test
# description: Pushdown set operations
# group: [optimizer]
# group: [setops]

statement ok
PRAGMA explain_output = 'OPTIMIZED_ONLY'

query II
explain select 42 intersect select 42;
----
logical_opt <REGEX>:.*INTERSECT.*
logical_opt <REGEX>:.*SEMI.*

# intersect is empty if either side is empty
query II
explain select 42 intersect select 42 where 1=0;
----
logical_opt <!REGEX>:.*INTERSECT.*
logical_opt <!REGEX>:.*SEMI.*

query II
explain select 42 where 1=0 intersect select 42;
----
logical_opt <!REGEX>:.*INTERSECT.*
logical_opt <!REGEX>:.*SEMI.*

# except is empty if LHS is empty
query II
explain select 42 where 1=0 except select 42;
----
logical_opt <!REGEX>:.*EXCEPT.*
logical_opt <!REGEX>:.*ANTI.*

# if RHS is empty we can optimize away the except
query II
explain select 42 except select 42 where 1=0;
----
logical_opt <!REGEX>:.*EXCEPT.*
logical_opt <!REGEX>:.*ANTI.*

# now pushdown subquery with set ops
query II
explain select * from (select 42 intersect select 42) tbl(i) where i=42;
----
logical_opt <REGEX>:.*INTERSECT.*
logical_opt <REGEX>:.*SEMI.*

query II
explain select * from (select 42 intersect select 43) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*INTERSECT.*
logical_opt <!REGEX>:.*SEMI.*

query II
explain select * from (select 43 intersect select 42) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*INTERSECT.*
logical_opt <!REGEX>:.*SEMI.*

query II
explain select * from (select 42 except select 42) tbl(i) where i=42;
----
logical_opt <REGEX>:.*EXCEPT.*
logical_opt <REGEX>:.*ANTI.*

query II
explain select * from (select 42 except select 43) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*EXCEPT.*
logical_opt <!REGEX>:.*ANTI.*

query II
explain select * from (select 43 except select 42) tbl(i) where i=42;
----
logical_opt <!REGEX>:.*EXCEPT.*
logical_opt <!REGEX>:.*ANTI.*

query I
select 42 intersect select 42;
Expand Down
Loading