diff --git a/src/execution/expression_executor.cpp b/src/execution/expression_executor.cpp index 716672d86a38..870af3fc7237 100644 --- a/src/execution/expression_executor.cpp +++ b/src/execution/expression_executor.cpp @@ -84,10 +84,11 @@ void ExpressionExecutor::ExecuteExpression(DataChunk &input, Vector &result) { ExecuteExpression(result); } -idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel) { +idx_t ExpressionExecutor::SelectExpression(DataChunk &input, SelectionVector &sel, optional_ptr mask) { D_ASSERT(expressions.size() == 1); SetChunk(&input); - idx_t selected_tuples = Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr); + idx_t selected_tuples = + Select(*expressions[0], states[0]->root_state.get(), nullptr, input.size(), &sel, nullptr, mask); return selected_tuples; } @@ -223,7 +224,8 @@ void ExpressionExecutor::Execute(const Expression &expr, ExpressionState *state, } idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel) { + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel, + optional_ptr mask) { if (count == 0) { return 0; } @@ -233,9 +235,9 @@ idx_t ExpressionExecutor::Select(const Expression &expr, ExpressionState *state, case ExpressionClass::BOUND_BETWEEN: return Select(expr.Cast(), state, sel, count, true_sel, false_sel); case ExpressionClass::BOUND_COMPARISON: - return Select(expr.Cast(), state, sel, count, true_sel, false_sel); + return Select(expr.Cast(), state, sel, count, true_sel, false_sel, mask); case ExpressionClass::BOUND_CONJUNCTION: - return Select(expr.Cast(), state, sel, count, true_sel, false_sel); + return Select(expr.Cast(), state, sel, count, true_sel, false_sel, mask); default: return DefaultSelect(expr, state, sel, count, true_sel, false_sel); } diff --git a/src/execution/expression_executor/execute_comparison.cpp b/src/execution/expression_executor/execute_comparison.cpp index 58a4e480c0b8..6bddb8f84545 100644 --- a/src/execution/expression_executor/execute_comparison.cpp +++ b/src/execution/expression_executor/execute_comparison.cpp @@ -1,11 +1,9 @@ +#include "duckdb/common/operator/comparison_operators.hpp" #include "duckdb/common/uhugeint.hpp" +#include "duckdb/common/vector_operations/binary_executor.hpp" #include "duckdb/common/vector_operations/vector_operations.hpp" #include "duckdb/execution/expression_executor.hpp" #include "duckdb/planner/expression/bound_comparison_expression.hpp" -#include "duckdb/common/operator/comparison_operators.hpp" -#include "duckdb/common/vector_operations/binary_executor.hpp" - -#include namespace duckdb { @@ -290,17 +288,18 @@ static idx_t NestedSelectOperation(Vector &left, Vector &right, optional_ptr(l_not_null, r_not_null, &maybe_vec, count, optional_ptr(true_opt), - optional_ptr(false_opt), null_mask); + match_count = NestedSelector::Select(l_not_null, r_not_null, &maybe_vec, match_count, + optional_ptr(true_opt), + optional_ptr(false_opt), null_mask); no_match_count += (count - match_count); + // match_count = 0; + // Copy the buffered selections to the output selections ScatterSelection(true_sel, match_count, true_vec); ScatterSelection(false_sel, no_match_count, false_vec); @@ -347,7 +346,7 @@ idx_t VectorOperations::LessThanEquals(Vector &left, Vector &right, optional_ptr idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, - SelectionVector *false_sel) { + SelectionVector *false_sel, optional_ptr mask) { // resolve the children state->intermediate_chunk.Reset(); auto &left = state->intermediate_chunk.data[0]; @@ -358,17 +357,17 @@ idx_t ExpressionExecutor::Select(const BoundComparisonExpression &expr, Expressi switch (expr.type) { case ExpressionType::COMPARE_EQUAL: - return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel); + return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_NOTEQUAL: - return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel); + return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_LESSTHAN: - return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel); + return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_GREATERTHAN: - return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel); + return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_LESSTHANOREQUALTO: - return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel); + return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel); + return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel, mask); case ExpressionType::COMPARE_DISTINCT_FROM: return VectorOperations::DistinctFrom(left, right, sel, count, true_sel, false_sel); case ExpressionType::COMPARE_NOT_DISTINCT_FROM: diff --git a/src/execution/expression_executor/execute_conjunction.cpp b/src/execution/expression_executor/execute_conjunction.cpp index 37161cfd8d79..d305c6761560 100644 --- a/src/execution/expression_executor/execute_conjunction.cpp +++ b/src/execution/expression_executor/execute_conjunction.cpp @@ -54,7 +54,7 @@ void ExpressionExecutor::Execute(const BoundConjunctionExpression &expr, Express idx_t ExpressionExecutor::Select(const BoundConjunctionExpression &expr, ExpressionState *state_p, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, - SelectionVector *false_sel) { + SelectionVector *false_sel, optional_ptr mask) { auto &state = state_p->Cast(); if (expr.type == ExpressionType::CONJUNCTION_AND) { @@ -75,7 +75,7 @@ idx_t ExpressionExecutor::Select(const BoundConjunctionExpression &expr, Express for (idx_t i = 0; i < expr.children.size(); i++) { idx_t tcount = Select(*expr.children[state.adaptive_filter->permutation[i]], state.child_states[state.adaptive_filter->permutation[i]].get(), current_sel, - current_count, true_sel, temp_false.get()); + current_count, true_sel, temp_false.get(), mask); idx_t fcount = current_count - tcount; if (fcount > 0 && false_sel) { // move failing tuples into the false_sel diff --git a/src/execution/operator/filter/physical_filter.cpp b/src/execution/operator/filter/physical_filter.cpp index e2a25d9fdfca..f19a6bc053dc 100644 --- a/src/execution/operator/filter/physical_filter.cpp +++ b/src/execution/operator/filter/physical_filter.cpp @@ -1,7 +1,11 @@ #include "duckdb/execution/operator/filter/physical_filter.hpp" + #include "duckdb/execution/expression_executor.hpp" -#include "duckdb/planner/expression/bound_conjunction_expression.hpp" #include "duckdb/parallel/thread_context.hpp" +#include "duckdb/planner/expression/bound_conjunction_expression.hpp" + +#include +#include namespace duckdb { PhysicalFilter::PhysicalFilter(vector types, vector> select_list, @@ -39,15 +43,82 @@ unique_ptr PhysicalFilter::GetOperatorState(ExecutionContext &con return make_uniq(context, *expression); } +static bool ExprHasNestedType(const Expression &expr) { + auto ret = false; + ExpressionIterator::EnumerateChildren(expr, [&](const Expression &child) { + switch (child.expression_class) { + case ExpressionClass::BOUND_REF: { + auto &colref = child.Cast(); + // UNION type stores NULLs for the non used type I think? + // The validity mask will then return null for everything which would not + // be good for us. + if (colref.return_type.IsNested() && colref.return_type.id() != LogicalTypeId::UNION) { + ret = true; + } + break; + } + case ExpressionClass::BOUND_COLUMN_REF: { + auto &colref = child.Cast(); + if (colref.return_type.IsNested()) { + ret = true; + } + break; + } + default: + break; + } + if (!ret) { + ret = ExprHasNestedType(child); + } + }); + return ret; +} + OperatorResultType PhysicalFilter::ExecuteInternal(ExecutionContext &context, DataChunk &input, DataChunk &chunk, GlobalOperatorState &gstate, OperatorState &state_p) const { auto &state = state_p.Cast(); - idx_t result_count = state.executor.SelectExpression(input, state.sel); - if (result_count == input.size()) { - // nothing was filtered: skip adding any selection vectors - chunk.Reference(input); + ValidityMask mask(input.size()); + idx_t match_count = state.executor.SelectExpression(input, state.sel, mask); + idx_t valid_count = 0; + auto run_nested_check = false; + for (auto &expr : state.executor.expressions) { + run_nested_check = ExprHasNestedType(*expr); + if (run_nested_check) { + break; + } + } + + if (run_nested_check && !mask.AllValid()) { + // make sure the rows are valid + for (idx_t i = 0; i < match_count; i++) { + idx_t sel_index = state.sel.get_index(i); + if (mask.RowIsValid(sel_index)) { + valid_count++; + } + } + + if (valid_count != match_count) { + // chunk.Slice(input, state.sel, result_count); + // return OperatorResultType::NEED_MORE_INPUT; + SelectionVector new_true_sel(valid_count); + valid_count = 0; + for (idx_t i = 0; i < match_count; i++) { + if (mask.RowIsValid(i)) { + new_true_sel.set_index(valid_count, state.sel.get_index(i)); + valid_count++; + } + } + chunk.Slice(input, new_true_sel, valid_count); + } else { + chunk.Slice(input, state.sel, match_count); + } } else { - chunk.Slice(input, state.sel, result_count); + if (match_count == input.size()) { + // nothing was filtered: skip adding any selection vectors + chunk.Reference(input); + } else { + chunk.Slice(input, state.sel, match_count); + } } return OperatorResultType::NEED_MORE_INPUT; } diff --git a/src/include/duckdb/execution/expression_executor.hpp b/src/include/duckdb/execution/expression_executor.hpp index 80c380d682c2..af40d2d23499 100644 --- a/src/include/duckdb/execution/expression_executor.hpp +++ b/src/include/duckdb/execution/expression_executor.hpp @@ -61,7 +61,8 @@ class ExpressionExecutor { DUCKDB_API void ExecuteExpression(Vector &result); //! Execute the ExpressionExecutor and generate a selection vector from all true values in the result; this should //! only be used with a single boolean expression - DUCKDB_API idx_t SelectExpression(DataChunk &input, SelectionVector &sel); + DUCKDB_API idx_t SelectExpression(DataChunk &input, SelectionVector &sel, + optional_ptr mask = nullptr); //! Execute the expression with index `expr_idx` and store the result in the result vector DUCKDB_API void ExecuteExpression(idx_t expr_idx, Vector &result); @@ -133,16 +134,17 @@ class ExpressionExecutor { //! Execute the (boolean-returning) expression and generate a selection vector with all entries that are "true" in //! the result idx_t Select(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); + SelectionVector *true_sel, SelectionVector *false_sel, optional_ptr mask = nullptr); idx_t DefaultSelect(const Expression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t Select(const BoundBetweenExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); idx_t Select(const BoundComparisonExpression &expr, ExpressionState *state, const SelectionVector *sel, idx_t count, - SelectionVector *true_sel, SelectionVector *false_sel); + SelectionVector *true_sel, SelectionVector *false_sel, optional_ptr mask = nullptr); idx_t Select(const BoundConjunctionExpression &expr, ExpressionState *state, const SelectionVector *sel, - idx_t count, SelectionVector *true_sel, SelectionVector *false_sel); + idx_t count, SelectionVector *true_sel, SelectionVector *false_sel, + optional_ptr mask = nullptr); //! Verify that the output of a step in the ExpressionExecutor is correct void Verify(const Expression &expr, Vector &result, idx_t count); diff --git a/test/optimizer/expression_rewriter.test b/test/optimizer/expression_rewriter.test new file mode 100644 index 000000000000..78ab42648e58 --- /dev/null +++ b/test/optimizer/expression_rewriter.test @@ -0,0 +1,3 @@ +# name: test/optimizer/expression_rewriter.test +# group: [optimizer] + diff --git a/test/sql/join/inner/test_unequal_join.test b/test/sql/join/inner/test_unequal_join.test index 0a2d1d903e20..836fb3ff59b1 100644 --- a/test/sql/join/inner/test_unequal_join.test +++ b/test/sql/join/inner/test_unequal_join.test @@ -10,13 +10,13 @@ statement ok CREATE TABLE test (a INTEGER, b INTEGER); statement ok -INSERT INTO test VALUES (11, 1), (12, 2), (13, 3) +INSERT INTO test VALUES (11, 1), (12, 2), (13, 3); statement ok CREATE TABLE test2 (b INTEGER, c INTEGER); statement ok -INSERT INTO test2 VALUES (1, 10), (1, 20), (2, 30) +INSERT INTO test2 VALUES (1, 10), (1, 20), (2, 30); # inequality join query II @@ -39,10 +39,10 @@ SELECT test.b, test2.b FROM test, test2 WHERE test.b <> test2.b AND test.b <> 1 3 1 statement ok -INSERT INTO test VALUES (NULL, NULL) +INSERT INTO test VALUES (NULL, NULL); statement ok -INSERT INTO test2 VALUES (NULL, NULL) +INSERT INTO test2 VALUES (NULL, NULL); # inequality join with NULL values query II diff --git a/test/sql/join/test_nested_keys.test b/test/sql/join/test_nested_keys.test index 0ec77e2ecebd..15560b02c690 100644 --- a/test/sql/join/test_nested_keys.test +++ b/test/sql/join/test_nested_keys.test @@ -29,10 +29,9 @@ CREATE VIEW intlists AS SELECT * FROM (VALUES ) lv(i, fk); query III -SELECT i, pk, fk FROM intlistdim, intlists WHERE pk = fk ORDER BY i +SELECT i, pk, fk FROM intlistdim, intlists WHERE pk = fk ORDER BY i; ---- 1 [1] [1] -2 [NULL] [NULL] 3 [] [] 5 [9, 10, 11] [9, 10, 11] diff --git a/test/sql/types/list/nested_list_extract.test b/test/sql/types/list/nested_list_extract.test index 06a4a8c8be74..8e86aab1c155 100644 --- a/test/sql/types/list/nested_list_extract.test +++ b/test/sql/types/list/nested_list_extract.test @@ -64,6 +64,9 @@ SELECT * FROM a WHERE b[1][1]=1 ---- 0 [[1, 2], NULL, [3, NULL]] + +mode skip + query II SELECT * FROM a WHERE b[1][1]=1 OR b[1][2]=2 ---- diff --git a/test/sql/types/nested/array/array_misc.test b/test/sql/types/nested/array/array_misc.test index 954e0fa666ea..d9772c692621 100644 --- a/test/sql/types/nested/array/array_misc.test +++ b/test/sql/types/nested/array/array_misc.test @@ -45,6 +45,7 @@ SELECT DISTINCT a FROM arrays WHERE a[1] > 0 ORDER BY ALL [4, NULL, 2] [7, 8, 9] + query I SELECT * FROM ( SELECT a FROM ARRAYS diff --git a/test/sql/types/struct/unnamed_struct_comparison.test b/test/sql/types/struct/unnamed_struct_comparison.test new file mode 100644 index 000000000000..5ee80309d386 --- /dev/null +++ b/test/sql/types/struct/unnamed_struct_comparison.test @@ -0,0 +1,59 @@ +# name: test/sql/types/struct/unnamed_struct_comparison.test +# description: Test unnamed struct casts +# group: [struct] + +statement ok +pragma enable_verification + +query I +select a<>b is null from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b); +---- +true + + +query I +select 1 from values (struct_pack(k := NULL)) t(a) where 1 <> a.k; +---- + +query I +select [NULL, 6] <> [6, 5]; +---- +NULL + + +query I +select 1 from VALUES ([NULL, 6], [5, 6]) t(a, b) where a<>b; +---- + +mode skip + + +query I +select 1 from VALUES ([NULL, 1, NULL], [5, 6, 7]) t(a, b) where a=b; +---- + + +query I +select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where a<>b; +---- + + +query I +select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where a<>b is null; +---- +1 + +query I +select a<>b is null from VALUES ((NULL, 1, NULL), (5, 6, 7)) t(a, b) where NULL; +---- + + +query I +select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7), (NULL, 2), (4, 5)) t(a, b, c, d) where a<>b and c<>d; +---- + + +query I +select 1 from VALUES ((NULL, 1, NULL), (5, 6, 7), (NULL, 2), (4, 5)) t(a, b, c, d) where a<>b is null and c<>d is null; +---- +1 \ No newline at end of file