Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/execution/expression_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ void ExpressionExecutor::ExecuteExpression(DataChunk &input, Vector &result) {
ExecuteExpression(result);
}

idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel) {
idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel, optional_ptr<ValidityMask> mask) {
D_ASSERT(expressions.size() == 1);
SetChunk(&input);
idx_t selected_tuples = Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr);
idx_t selected_tuples =
Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr, mask);
return selected_tuples;
}

Expand Down Expand Up @@ -223,7 +224,8 @@ void ExpressionExecutor::Execute(const Expression &expr, ExpressionState *state,
}

idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) {
idx_t count, SelectionVector *true_sel, SelectionVector *false_sel,
optional_ptr<ValidityMask> mask) {
if (count == 0) {
return 0;
}
Expand All @@ -233,9 +235,9 @@ idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state,
case ExpressionClass::BOUND_BETWEEN:
return Select(expr.Cast<BoundBetweenExpression>(), state, sel, count, true_sel, false_sel);
case ExpressionClass::BOUND_COMPARISON:
return Select(expr.Cast<BoundComparisonExpression>(), state, sel, count, true_sel, false_sel);
return Select(expr.Cast<BoundComparisonExpression>(), state, sel, count, true_sel, false_sel, mask);
case ExpressionClass::BOUND_CONJUNCTION:
return Select(expr.Cast<BoundConjunctionExpression>(), state, sel, count, true_sel, false_sel);
return Select(expr.Cast<BoundConjunctionExpression>(), state, sel, count, true_sel, false_sel, mask);
default:
return DefaultSelect(expr, state, sel, count, true_sel, false_sel);
}
Expand Down
29 changes: 14 additions & 15 deletions src/execution/expression_executor/execute_comparison.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#include "duckdb/common/operator/comparison_operators.hpp"
#include "duckdb/common/uhugeint.hpp"
#include "duckdb/common/vector_operations/binary_executor.hpp"
#include "duckdb/common/vector_operations/vector_operations.hpp"
#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
#include "duckdb/common/operator/comparison_operators.hpp"
#include "duckdb/common/vector_operations/binary_executor.hpp"

#include <algorithm>

namespace duckdb {

Expand Down Expand Up @@ -290,17 +288,18 @@ static idx_t NestedSelectOperation(Vector &left, Vector &right, optional_ptr<con
// Handle NULL nested values
Vector l_not_null(left);
Vector r_not_null(right);

auto match_count = SelectNotNull(l_not_null, r_not_null, count, *sel, maybe_vec, false_opt, null_mask);
auto no_match_count = count - match_count;
count = match_count;

// Now that we have handled the NULLs, we can use the recursive nested comparator for the rest.
match_count =
NestedSelector::Select<OP>(l_not_null, r_not_null, &maybe_vec, count, optional_ptr<SelectionVector>(true_opt),
optional_ptr<SelectionVector>(false_opt), null_mask);
match_count = NestedSelector::Select<OP>(l_not_null, r_not_null, &maybe_vec, match_count,
optional_ptr<SelectionVector>(true_opt),
optional_ptr<SelectionVector>(false_opt), null_mask);
no_match_count += (count - match_count);

// match_count = 0;

// Copy the buffered selections to the output selections
ScatterSelection(true_sel, match_count, true_vec);
ScatterSelection(false_sel, no_match_count, false_vec);
Expand Down Expand Up @@ -347,7 +346,7 @@ idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, optional_ptr

idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state,
const SelectionVector *sel, idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
SelectionVector *false_sel, optional_ptr<ValidityMask> mask) {
// resolve the children
state->intermediate_chunk.Reset();
auto &left = state->intermediate_chunk.data[0];
Expand All @@ -358,17 +357,17 @@ idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, Expressi

switch (expr.type) {
case ExpressionType::COMPARE_EQUAL:
return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel);
return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_NOTEQUAL:
return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel);
return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_LESSTHAN:
return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel);
return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_GREATERTHAN:
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel);
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel, mask);
case ExpressionType::COMPARE_DISTINCT_FROM:
return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, false_sel);
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
Expand Down
4 changes: 2 additions & 2 deletions src/execution/expression_executor/execute_conjunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void ExpressionExecutor::Execute(const BoundConjunctionExpression &expr, Express

idx_t ExpressionExecutor::Select(const BoundConjunctionExpression &expr, ExpressionState *state_p,
const SelectionVector *sel, idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
SelectionVector *false_sel, optional_ptr<ValidityMask> mask) {
auto &state = state_p->Cast<ConjunctionState>();

if (expr.type == ExpressionType::CONJUNCTION_AND) {
Expand All @@ -75,7 +75,7 @@ idx_t ExpressionExecutor::Select(const BoundConjunctionExpression &expr, Express
for (idx_t i = 0; i < expr.children.size(); i++) {
idx_t tcount = Select(*expr.children[state.adaptive_filter->permutation[i]],
state.child_states[state.adaptive_filter->permutation[i]].get(), current_sel,
current_count, true_sel, temp_false.get());
current_count, true_sel, temp_false.get(), mask);
idx_t fcount = current_count - tcount;
if (fcount > 0 && false_sel) {
// move failing tuples into the false_sel
Expand Down
83 changes: 77 additions & 6 deletions src/execution/operator/filter/physical_filter.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
#include "duckdb/execution/operator/filter/physical_filter.hpp"

#include "duckdb/execution/expression_executor.hpp"
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
#include "duckdb/parallel/thread_context.hpp"
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"

#include <duckdb/planner/expression/bound_reference_expression.hpp>
#include <duckdb/planner/expression_iterator.hpp>
namespace duckdb {

PhysicalFilter::PhysicalFilter(vector<LogicalType> types, vector<unique_ptr<Expression>> select_list,
Expand Down Expand Up @@ -39,15 +43,82 @@ unique_ptr<OperatorState> PhysicalFilter::GetOperatorState(ExecutionContext &con
return make_uniq<FilterState>(context, *expression);
}

static bool ExprHasNestedType(const Expression &expr) {
auto ret = false;
ExpressionIterator::EnumerateChildren(expr, [&](const Expression &child) {
switch (child.expression_class) {
case ExpressionClass::BOUND_REF: {
auto &colref = child.Cast<BoundReferenceExpression>();
// UNION type stores NULLs for the non used type I think?
// The validity mask will then return null for everything which would not
// be good for us.
if (colref.return_type.IsNested() && colref.return_type.id() != LogicalTypeId::UNION) {
ret = true;
}
break;
}
case ExpressionClass::BOUND_COLUMN_REF: {
auto &colref = child.Cast<BoundColumnRefExpression>();
if (colref.return_type.IsNested()) {
ret = true;
}
break;
}
default:
break;
}
if (!ret) {
ret = ExprHasNestedType(child);
}
});
return ret;
}

OperatorResultType PhysicalFilter::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk,
GlobalOperatorState &gstate, OperatorState &state_p) const {
auto &state = state_p.Cast<FilterState>();
idx_t result_count = state.executor.SelectExpression(input, state.sel);
if (result_count == input.size()) {
// nothing was filtered: skip adding any selection vectors
chunk.Reference(input);
ValidityMask mask(input.size());
idx_t match_count = state.executor.SelectExpression(input, state.sel, mask);
idx_t valid_count = 0;
auto run_nested_check = false;
for (auto &expr : state.executor.expressions) {
run_nested_check = ExprHasNestedType(*expr);
if (run_nested_check) {
break;
}
}

if (run_nested_check && !mask.AllValid()) {
// make sure the rows are valid
for (idx_t i = 0; i < match_count; i++) {
idx_t sel_index = state.sel.get_index(i);
if (mask.RowIsValid(sel_index)) {
valid_count++;
}
}

if (valid_count != match_count) {
// chunk.Slice(input, state.sel, result_count);
// return OperatorResultType::NEED_MORE_INPUT;
SelectionVector new_true_sel(valid_count);
valid_count = 0;
for (idx_t i = 0; i < match_count; i++) {
if (mask.RowIsValid(i)) {
new_true_sel.set_index(valid_count, state.sel.get_index(i));
valid_count++;
}
}
chunk.Slice(input, new_true_sel, valid_count);
} else {
chunk.Slice(input, state.sel, match_count);
}
} else {
chunk.Slice(input, state.sel, result_count);
if (match_count == input.size()) {
// nothing was filtered: skip adding any selection vectors
chunk.Reference(input);
} else {
chunk.Slice(input, state.sel, match_count);
}
}
return OperatorResultType::NEED_MORE_INPUT;
}
Expand Down
10 changes: 6 additions & 4 deletions src/include/duckdb/execution/expression_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ class ExpressionExecutor {
DUCKDB_API void ExecuteExpression(Vector &result);
//! Execute the ExpressionExecutor and generate a selection vector from all true values in the result; this should
//! only be used with a single boolean expression
DUCKDB_API idx_t SelectExpression(DataChunk &input, SelectionVector &sel);
DUCKDB_API idx_t SelectExpression(DataChunk &input, SelectionVector &sel,
optional_ptr<ValidityMask> mask = nullptr);

//! Execute the expression with index `expr_idx` and store the result in the result vector
DUCKDB_API void ExecuteExpression(idx_t expr_idx, Vector &result);
Expand Down Expand Up @@ -133,16 +134,17 @@ class ExpressionExecutor {
//! Execute the (boolean-returning) expression and generate a selection vector with all entries that are "true" in
//! the result
idx_t Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);
SelectionVector *true_sel, SelectionVector *false_sel, optional_ptr<ValidityMask> mask = nullptr);
idx_t DefaultSelect(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);

idx_t Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);
idx_t Select(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);
SelectionVector *true_sel, SelectionVector *false_sel, optional_ptr<ValidityMask> mask = nullptr);
idx_t Select(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel, SelectionVector *false_sel);
idx_t count, SelectionVector *true_sel, SelectionVector *false_sel,
optional_ptr<ValidityMask> mask = nullptr);

//! Verify that the output of a step in the ExpressionExecutor is correct
void Verify(const Expression &expr, Vector &result, idx_t count);
Expand Down
3 changes: 3 additions & 0 deletions test/optimizer/expression_rewriter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# name: test/optimizer/expression_rewriter.test
# group: [optimizer]

8 changes: 4 additions & 4 deletions test/sql/join/inner/test_unequal_join.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ statement ok
CREATE TABLE test (a INTEGER, b INTEGER);

statement ok
INSERT INTO test VALUES (11, 1), (12, 2), (13, 3)
INSERT INTO test VALUES (11, 1), (12, 2), (13, 3);

statement ok
CREATE TABLE test2 (b INTEGER, c INTEGER);

statement ok
INSERT INTO test2 VALUES (1, 10), (1, 20), (2, 30)
INSERT INTO test2 VALUES (1, 10), (1, 20), (2, 30);

# inequality join
query II
Expand All @@ -39,10 +39,10 @@ SELECT test.b, test2.b FROM test, test2 WHERE test.b <> test2.b AND test.b <> 1
3 1

statement ok
INSERT INTO test VALUES (NULL, NULL)
INSERT INTO test VALUES (NULL, NULL);

statement ok
INSERT INTO test2 VALUES (NULL, NULL)
INSERT INTO test2 VALUES (NULL, NULL);

# inequality join with NULL values
query II
Expand Down
3 changes: 1 addition & 2 deletions test/sql/join/test_nested_keys.test
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,9 @@ CREATE VIEW intlists AS SELECT * FROM (VALUES
) lv(i, fk);

query III
SELECT i, pk, fk FROM intlistdim, intlists WHERE pk = fk ORDER BY i
SELECT i, pk, fk FROM intlistdim, intlists WHERE pk = fk ORDER BY i;
----
1 [1] [1]
2 [NULL] [NULL]
3 [] []
5 [9, 10, 11] [9, 10, 11]

Expand Down
3 changes: 3 additions & 0 deletions test/sql/types/list/nested_list_extract.test
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ SELECT * FROM a WHERE b[1][1]=1
----
0 [[1, 2], NULL, [3, NULL]]


mode skip

query II
SELECT * FROM a WHERE b[1][1]=1 OR b[1][2]=2
----
Expand Down
1 change: 1 addition & 0 deletions test/sql/types/nested/array/array_misc.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ SELECT DISTINCT a FROM arrays WHERE a[1] > 0 ORDER BY ALL
[4, NULL, 2]
[7, 8, 9]


query I
SELECT * FROM (
SELECT a FROM ARRAYS
Expand Down
59 changes: 59 additions & 0 deletions test/sql/types/struct/unnamed_struct_comparison.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# name: test/sql/types/struct/unnamed_struct_comparison.test
# description: Test unnamed struct casts
# group: [struct]

statement ok
pragma enable_verification

query I
select a<>b is null from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b);
----
true


query I
select 1 from values (struct_pack(k := NULL)) t(a) where 1 <> a.k;
----

query I
select [NULL, 6] <> [6, 5];
----
NULL


query I
select 1 from VALUES ([NULL, 6], [5, 6]) t(a, b) where a<>b;
----

mode skip


query I
select 1 from VALUES ([NULL, 1, NULL], [5, 6, 7]) t(a, b) where a=b;
----


query I
select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where a<>b;
----


query I
select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where a<>b is null;
----
1

query I
select a<>b is null from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where NULL;
----


query I
select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7), (NULL, 2), (4, 5)) t(a, b, c, d) where a<>b and c<>d;
----


query I
select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7), (NULL, 2), (4, 5)) t(a, b, c, d) where a<>b is null and c<>d is null;
----
1