From 3c5aea1fa2745532ba9f2ba7e37115d1ceb86a40 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 13:12:59 +0800 Subject: [PATCH 1/9] support nulleq in tiflash Signed-off-by: xufei --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 15 +- .../tests/gtest_tidb_null_eq_func.cpp | 40 +++++ dbms/src/Functions/FunctionsComparison.cpp | 165 ++++++++++++++++++ .../Functions/tests/gtest_tidb_null_eq.cpp | 93 ++++++++++ 4 files changed, 306 insertions(+), 7 deletions(-) create mode 100644 dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp create mode 100644 dbms/src/Functions/tests/gtest_tidb_null_eq.cpp diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index fd9aff4bb8f..37355ba2109 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -216,13 +216,14 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::NEJson, "notEquals"}, {tipb::ScalarFuncSig::NEVectorFloat32, "notEquals"}, - //{tipb::ScalarFuncSig::NullEQInt, "cast"}, - //{tipb::ScalarFuncSig::NullEQReal, "cast"}, - //{tipb::ScalarFuncSig::NullEQString, "cast"}, - //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, - //{tipb::ScalarFuncSig::NullEQTime, "cast"}, - //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, - //{tipb::ScalarFuncSig::NullEQJson, "cast"}, + {tipb::ScalarFuncSig::NullEQInt, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQReal, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQString, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQDecimal, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQTime, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQDuration, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQJson, "tidbNullEQ"}, + {tipb::ScalarFuncSig::NullEQVectorFloat32, "tidbNullEQ"}, {tipb::ScalarFuncSig::PlusReal, "plus"}, {tipb::ScalarFuncSig::PlusDecimal, "plus"}, diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp new file mode 100644 index 00000000000..398936c93d9 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp @@ -0,0 +1,40 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::tests +{ +TEST(TiDBNullEQFuncTest, DagUtilsMappedToTidbNullEQ) +{ + { + tipb::Expr expr; + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.set_sig(tipb::ScalarFuncSig::NullEQInt); + + ASSERT_TRUE(isScalarFunctionExpr(expr)); + ASSERT_EQ(getFunctionName(expr), "tidbNullEQ"); + } + { + tipb::Expr expr; + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.set_sig(tipb::ScalarFuncSig::NullEQVectorFloat32); + + ASSERT_TRUE(isScalarFunctionExpr(expr)); + ASSERT_EQ(getFunctionName(expr), "tidbNullEQ"); + } +} + +} // namespace DB::tests diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index e57443f809d..e7c70e06f76 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -12,12 +12,176 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include namespace DB { +namespace ErrorCodes +{ +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ILLEGAL_COLUMN; +extern const int LOGICAL_ERROR; +} // namespace ErrorCodes + +class FunctionTiDBNullEQ : public IFunction +{ +public: + static constexpr auto name = "tidbNullEQ"; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + + void setCollator(const TiDB::TiDBCollatorPtr & collator_) override + { + collator = collator_; + equals_function->setCollator(collator_); + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2.", + getName(), + arguments.size()); + + /// `NULL <=> x` is always true/false (never NULL), even if `NULL` is represented as `Nothing`. + if (arguments[0]->onlyNull() || arguments[1]->onlyNull()) + return std::make_shared(); + + /// Use equals to validate that the input types are comparable. + /// Always return non-nullable UInt8 because `NULL <=> x` is always true/false (not NULL). + FunctionEquals().getReturnTypeImpl({removeNullable(arguments[0]), removeNullable(arguments[1])}); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const auto & left = block.getByPosition(arguments[0]); + const auto & right = block.getByPosition(arguments[1]); + + ColumnPtr left_col = left.column; + ColumnPtr right_col = right.column; + + /// Need to materialize const columns since `removeNullable` does not unwrap `ColumnConst(ColumnNullable)`. + if (ColumnPtr converted = left_col->convertToFullColumnIfConst()) + left_col = converted; + if (ColumnPtr converted = right_col->convertToFullColumnIfConst()) + right_col = converted; + + const size_t rows = left_col->size(); + if (unlikely(right_col->size() != rows)) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Columns sizes are different in function {}: left {}, right {}.", + getName(), + rows, + right_col->size()); + + /// Fast path for always-NULL columns (Nullable(Nothing)). + /// `NULL <=> x` equals to `isNull(x)`; `NULL <=> NULL` is always 1. + if (left_col->onlyNull() || right_col->onlyNull()) + { + if (left_col->onlyNull() && right_col->onlyNull()) + { + block.getByPosition(result).column = ColumnUInt8::create(rows, 1); + return; + } + + const ColumnPtr & other_col = left_col->onlyNull() ? right_col : left_col; + auto res_col = ColumnUInt8::create(); + auto & res_data = res_col->getData(); + res_data.resize(rows); + if (other_col->isColumnNullable()) + { + const auto & other_nullmap = assert_cast(*other_col).getNullMapData(); + res_data.assign(other_nullmap.begin(), other_nullmap.end()); + } + else + { + std::fill(res_data.begin(), res_data.end(), 0); + } + block.getByPosition(result).column = std::move(res_col); + return; + } + + ColumnPtr left_nested_col = left_col; + const NullMap * left_nullmap = nullptr; + if (left_col->isColumnNullable()) + { + const auto & nullable_col = assert_cast(*left_col); + left_nested_col = nullable_col.getNestedColumnPtr(); + left_nullmap = &nullable_col.getNullMapData(); + } + + ColumnPtr right_nested_col = right_col; + const NullMap * right_nullmap = nullptr; + if (right_col->isColumnNullable()) + { + const auto & nullable_col = assert_cast(*right_col); + right_nested_col = nullable_col.getNestedColumnPtr(); + right_nullmap = &nullable_col.getNullMapData(); + } + + /// Execute `equals` on nested columns. + Block temp_block; + temp_block.insert({left_nested_col, removeNullable(left.type), "a"}); + temp_block.insert({right_nested_col, removeNullable(right.type), "b"}); + temp_block.insert({nullptr, std::make_shared(), "res"}); + DefaultExecutable(equals_function).execute(temp_block, {0, 1}, 2); + + ColumnPtr eq_col = temp_block.getByPosition(2).column; + if (ColumnPtr converted = eq_col->convertToFullColumnIfConst()) + eq_col = converted; + + const auto * eq_vec_col = checkAndGetColumn(eq_col.get()); + if (unlikely(eq_vec_col == nullptr)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Unexpected result column type {} for equals inside {}.", + eq_col->getName(), + getName()); + + auto res_col = ColumnUInt8::create(); + auto & res_data = res_col->getData(); + const auto & eq_data = eq_vec_col->getData(); + res_data.assign(eq_data.begin(), eq_data.end()); + + /// Adjust for NULL values: + /// - both NULL => 1 + /// - one NULL => 0 + /// - no NULL => equals result + if (left_nullmap != nullptr || right_nullmap != nullptr) + { + for (size_t i = 0; i < rows; ++i) + { + const bool left_is_null = left_nullmap != nullptr && (*left_nullmap)[i]; + const bool right_is_null = right_nullmap != nullptr && (*right_nullmap)[i]; + if (left_is_null && right_is_null) + res_data[i] = 1; + else if (left_is_null || right_is_null) + res_data[i] = 0; + } + } + + block.getByPosition(result).column = std::move(res_col); + } + +private: + TiDB::TiDBCollatorPtr collator = nullptr; + std::shared_ptr equals_function = std::make_shared(); +}; + void registerFunctionsComparison(FunctionFactory & factory) { factory.registerFunction(); @@ -31,6 +195,7 @@ void registerFunctionsComparison(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } template <> diff --git a/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp b/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp new file mode 100644 index 00000000000..acbc55438c4 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp @@ -0,0 +1,93 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::tests +{ +class TestTiDBNullEQ : public DB::tests::FunctionTest +{ +}; + +TEST_F(TestTiDBNullEQ, Basic) +try +{ + auto a = createColumn({1, 2, 2}); + auto b = createColumn({1, 3, 2}); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({1, 0, 1}), res); +} +CATCH + +TEST_F(TestTiDBNullEQ, NullableInputs) +try +{ + auto a = createColumn>({1, std::nullopt, std::nullopt, 2}); + auto b = createColumn>({1, std::nullopt, 3, std::nullopt}); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({1, 1, 0, 0}), res); +} +CATCH + +TEST_F(TestTiDBNullEQ, OnlyNullColumns) +try +{ + auto a = createOnlyNullColumn(5); + auto b = createOnlyNullColumn(5); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({1, 1, 1, 1, 1}), res); +} +CATCH + +TEST_F(TestTiDBNullEQ, OneSideOnlyNull) +try +{ + auto a = createOnlyNullColumn(3); + auto b = createColumn>({1, std::nullopt, 3}); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({0, 1, 0}), res); +} +CATCH + +TEST_F(TestTiDBNullEQ, ConstOnlyNull) +try +{ + auto a = createOnlyNullColumnConst(4); + auto b = createConstColumn>(4, 1); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createConstColumn(4, 0), res); +} +CATCH + +TEST_F(TestTiDBNullEQ, CollatorIsForwardedToEquals) +try +{ + auto a = createColumn>({"a", "A", std::nullopt}); + auto b = createColumn>({"A", "a", std::nullopt}); + + auto ci_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_GENERAL_CI); + ASSERT_COLUMN_EQ(createColumn({1, 1, 1}), executeFunction("tidbNullEQ", {a, b}, ci_collator)); + + auto bin_collator = TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::BINARY); + ASSERT_COLUMN_EQ(createColumn({0, 0, 1}), executeFunction("tidbNullEQ", {a, b}, bin_collator)); +} +CATCH + +} // namespace DB::tests From 3367e53d72c9e316219560d9090136568080765b Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 14:47:39 +0800 Subject: [PATCH 2/9] refine Signed-off-by: xufei --- dbms/src/Functions/FunctionsComparison.cpp | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index e7c70e06f76..4f8c66f14fd 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -141,26 +141,29 @@ class FunctionTiDBNullEQ : public IFunction DefaultExecutable(equals_function).execute(temp_block, {0, 1}, 2); ColumnPtr eq_col = temp_block.getByPosition(2).column; + if (left_nullmap == nullptr && right_nullmap == nullptr) + { + block.getByPosition(result).column = std::move(eq_col); + return; + } + if (ColumnPtr converted = eq_col->convertToFullColumnIfConst()) eq_col = converted; - const auto * eq_vec_col = checkAndGetColumn(eq_col.get()); + /// Adjust for NULL values: + /// - both NULL => 1 + /// - one NULL => 0 + /// - no NULL => equals result + auto eq_mutable = (*std::move(eq_col)).mutate(); + auto * eq_vec_col = typeid_cast(eq_mutable.get()); if (unlikely(eq_vec_col == nullptr)) throw Exception( ErrorCodes::LOGICAL_ERROR, "Unexpected result column type {} for equals inside {}.", - eq_col->getName(), + eq_mutable->getName(), getName()); - auto res_col = ColumnUInt8::create(); - auto & res_data = res_col->getData(); - const auto & eq_data = eq_vec_col->getData(); - res_data.assign(eq_data.begin(), eq_data.end()); - - /// Adjust for NULL values: - /// - both NULL => 1 - /// - one NULL => 0 - /// - no NULL => equals result + auto & res_data = eq_vec_col->getData(); if (left_nullmap != nullptr || right_nullmap != nullptr) { for (size_t i = 0; i < rows; ++i) @@ -174,7 +177,7 @@ class FunctionTiDBNullEQ : public IFunction } } - block.getByPosition(result).column = std::move(res_col); + block.getByPosition(result).column = std::move(eq_mutable); } private: From 0c99d479af99f7adc5c7dc969d145d6ced0eebad Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 16:00:44 +0800 Subject: [PATCH 3/9] save work Signed-off-by: xufei --- dbms/src/Functions/FunctionsComparison.cpp | 46 +++--- .../Functions/tests/gtest_tidb_null_eq.cpp | 31 ++++ .../DeltaMerge/FilterParser/FilterParser.cpp | 21 ++- .../tests/gtest_dm_filter_parser_nulleq.cpp | 149 ++++++++++++++++++ 4 files changed, 222 insertions(+), 25 deletions(-) create mode 100644 dbms/src/Storages/DeltaMerge/tests/gtest_dm_filter_parser_nulleq.cpp diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index 4f8c66f14fd..99a3fb38954 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -73,12 +74,6 @@ class FunctionTiDBNullEQ : public IFunction ColumnPtr left_col = left.column; ColumnPtr right_col = right.column; - /// Need to materialize const columns since `removeNullable` does not unwrap `ColumnConst(ColumnNullable)`. - if (ColumnPtr converted = left_col->convertToFullColumnIfConst()) - left_col = converted; - if (ColumnPtr converted = right_col->convertToFullColumnIfConst()) - right_col = converted; - const size_t rows = left_col->size(); if (unlikely(right_col->size() != rows)) throw Exception( @@ -115,23 +110,38 @@ class FunctionTiDBNullEQ : public IFunction return; } + auto unwrapNullableColumn = [rows](const ColumnPtr & col, ColumnPtr & nested_col, const NullMap *& nullmap) { + nested_col = col; + nullmap = nullptr; + + if (const auto * const_col = typeid_cast(col.get())) + { + const auto & data_col = const_col->getDataColumn(); + if (data_col.isColumnNullable()) + { + /// `ColumnConst(ColumnNullable(NULL))` is handled by the `onlyNull()` fast path above. + /// If we reach here, the nullable constant must be non-NULL, so there is no nullmap to apply. + const auto & nullable_col = assert_cast(data_col); + nested_col = ColumnConst::create(nullable_col.getNestedColumnPtr(), rows); + } + return; + } + + if (col->isColumnNullable()) + { + const auto & nullable_col = assert_cast(*col); + nested_col = nullable_col.getNestedColumnPtr(); + nullmap = &nullable_col.getNullMapData(); + } + }; + ColumnPtr left_nested_col = left_col; const NullMap * left_nullmap = nullptr; - if (left_col->isColumnNullable()) - { - const auto & nullable_col = assert_cast(*left_col); - left_nested_col = nullable_col.getNestedColumnPtr(); - left_nullmap = &nullable_col.getNullMapData(); - } + unwrapNullableColumn(left_col, left_nested_col, left_nullmap); ColumnPtr right_nested_col = right_col; const NullMap * right_nullmap = nullptr; - if (right_col->isColumnNullable()) - { - const auto & nullable_col = assert_cast(*right_col); - right_nested_col = nullable_col.getNestedColumnPtr(); - right_nullmap = &nullable_col.getNullMapData(); - } + unwrapNullableColumn(right_col, right_nested_col, right_nullmap); /// Execute `equals` on nested columns. Block temp_block; diff --git a/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp b/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp index acbc55438c4..a9c222c36ed 100644 --- a/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_null_eq.cpp @@ -76,6 +76,37 @@ try } CATCH +TEST_F(TestTiDBNullEQ, ConstNullableNonNull) +try +{ + auto a = createConstColumn>(4, 1); + auto b = createColumn>({1, std::nullopt, 2, 1}); + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 1}), res); + + auto res2 = executeFunction("tidbNullEQ", b, a); + ASSERT_EQ(res2.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({1, 0, 0, 1}), res2); +} +CATCH + +TEST_F(TestTiDBNullEQ, ConstNullableNull) +try +{ + auto a = createConstColumn>(4, std::nullopt); + auto b = createColumn>({1, std::nullopt, 2, std::nullopt}); + + auto res = executeFunction("tidbNullEQ", a, b); + ASSERT_EQ(res.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({0, 1, 0, 1}), res); + + auto res2 = executeFunction("tidbNullEQ", b, a); + ASSERT_EQ(res2.type->getName(), "UInt8"); + ASSERT_COLUMN_EQ(createColumn({0, 1, 0, 1}), res2); +} +CATCH + TEST_F(TestTiDBNullEQ, CollatorIsForwardedToEquals) try { diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 7b635dd0dcd..0f8703c001a 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -212,6 +212,12 @@ inline RSOperatorPtr parseTiCompareExpr( // switch (filter_type) { case FilterParser::RSFilterType::Equal: + if ((expr.sig() == tipb::ScalarFuncSig::NullEQInt || expr.sig() == tipb::ScalarFuncSig::NullEQReal + || expr.sig() == tipb::ScalarFuncSig::NullEQString || expr.sig() == tipb::ScalarFuncSig::NullEQDecimal + || expr.sig() == tipb::ScalarFuncSig::NullEQTime || expr.sig() == tipb::ScalarFuncSig::NullEQDuration + || expr.sig() == tipb::ScalarFuncSig::NullEQJson || expr.sig() == tipb::ScalarFuncSig::NullEQVectorFloat32) + && values[0].isNull()) + return createIsNull(attr); return createEqual(attr, values[0]); case FilterParser::RSFilterType::NotEqual: return createNotEqual(attr, values[0]); @@ -579,13 +585,14 @@ std::unordered_map FilterParser {tipb::ScalarFuncSig::NEDuration, FilterParser::RSFilterType::NotEqual}, {tipb::ScalarFuncSig::NEJson, FilterParser::RSFilterType::NotEqual}, - //{tipb::ScalarFuncSig::NullEQInt, "cast"}, - //{tipb::ScalarFuncSig::NullEQReal, "cast"}, - //{tipb::ScalarFuncSig::NullEQString, "cast"}, - //{tipb::ScalarFuncSig::NullEQDecimal, "cast"}, - //{tipb::ScalarFuncSig::NullEQTime, "cast"}, - //{tipb::ScalarFuncSig::NullEQDuration, "cast"}, - //{tipb::ScalarFuncSig::NullEQJson, "cast"}, + {tipb::ScalarFuncSig::NullEQInt, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQReal, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQString, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQDecimal, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQTime, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQDuration, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQJson, FilterParser::RSFilterType::Equal}, + {tipb::ScalarFuncSig::NullEQVectorFloat32, FilterParser::RSFilterType::Equal}, // {tipb::ScalarFuncSig::PlusReal, "plus"}, // {tipb::ScalarFuncSig::PlusDecimal, "plus"}, diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_filter_parser_nulleq.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_filter_parser_nulleq.cpp new file mode 100644 index 00000000000..e904531464a --- /dev/null +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_filter_parser_nulleq.cpp @@ -0,0 +1,149 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::DM::tests +{ + +namespace +{ +tipb::Expr buildColumnRefExpr(Int64 column_index, Int32 field_type) +{ + tipb::Expr col; + col.set_tp(tipb::ExprType::ColumnRef); + { + WriteBufferFromOwnString ss; + encodeDAGInt64(column_index, ss); + col.set_val(ss.releaseStr()); + } + auto * field_type_pb = col.mutable_field_type(); + field_type_pb->set_tp(field_type); + field_type_pb->set_flag(0); + return col; +} + +tipb::Expr buildInt64LiteralExpr(Int64 value) +{ + tipb::Expr lit; + lit.set_tp(tipb::ExprType::Int64); + { + WriteBufferFromOwnString ss; + encodeDAGInt64(value, ss); + lit.set_val(ss.releaseStr()); + } + return lit; +} + +tipb::Expr buildNullLiteralExpr() +{ + tipb::Expr lit; + lit.set_tp(tipb::ExprType::Null); + return lit; +} + +String parseToDebugString(Context & context, const tipb::Expr & filter_expr) +{ + google::protobuf::RepeatedPtrField filters; + filters.Add()->CopyFrom(filter_expr); + + const google::protobuf::RepeatedPtrField pushed_down_filters{}; + + TiDB::ColumnInfo col; + col.id = 1; + TiDB::ColumnInfos column_infos = {col}; + + const ColumnDefines columns_to_read = {ColumnDefine{1, "a", std::make_shared()}}; + auto create_attr_by_column_id = [&columns_to_read](ColumnID column_id) -> Attr { + auto iter + = std::find_if(columns_to_read.begin(), columns_to_read.end(), [column_id](const ColumnDefine & d) -> bool { + return d.id == column_id; + }); + if (iter != columns_to_read.end()) + return Attr{.col_name = iter->name, .col_id = iter->id, .type = iter->type}; + return Attr{.col_name = "", .col_id = column_id, .type = DataTypePtr{}}; + }; + + const auto ann_query_info = tipb::ANNQueryInfo{}; + auto dag_query = std::make_unique( + filters, + ann_query_info, + pushed_down_filters, + column_infos, + std::vector{}, + 0, + context.getTimezoneInfo()); + + const auto op + = DB::DM::FilterParser::parseDAGQuery(*dag_query, column_infos, create_attr_by_column_id, Logger::get()); + return op->toDebugString(); +} +} // namespace + +TEST(DMFilterParserTest, ParseNullEQ) +try +{ + auto context = DMTestEnv::getContext(); + + { + // a <=> 1 -> equal(a, 1) + tipb::Expr expr; + expr.set_sig(tipb::ScalarFuncSig::NullEQInt); + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.add_children()->CopyFrom(buildColumnRefExpr(/*column_index*/ 0, TiDB::TypeLongLong)); + expr.add_children()->CopyFrom(buildInt64LiteralExpr(1)); + EXPECT_EQ(parseToDebugString(*context, expr), R"raw({"op":"equal","col":"a","value":"1"})raw"); + } + + { + // a <=> NULL -> isnull(a) + tipb::Expr expr; + expr.set_sig(tipb::ScalarFuncSig::NullEQInt); + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.add_children()->CopyFrom(buildColumnRefExpr(/*column_index*/ 0, TiDB::TypeLongLong)); + expr.add_children()->CopyFrom(buildNullLiteralExpr()); + EXPECT_EQ(parseToDebugString(*context, expr), R"raw({"op":"isnull","col":"a"})raw"); + } + + { + // NULL <=> a -> isnull(a) + tipb::Expr expr; + expr.set_sig(tipb::ScalarFuncSig::NullEQInt); + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.add_children()->CopyFrom(buildNullLiteralExpr()); + expr.add_children()->CopyFrom(buildColumnRefExpr(/*column_index*/ 0, TiDB::TypeLongLong)); + EXPECT_EQ(parseToDebugString(*context, expr), R"raw({"op":"isnull","col":"a"})raw"); + } + + { + // 1 <=> a -> equal(a, 1) + tipb::Expr expr; + expr.set_sig(tipb::ScalarFuncSig::NullEQInt); + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.add_children()->CopyFrom(buildInt64LiteralExpr(1)); + expr.add_children()->CopyFrom(buildColumnRefExpr(/*column_index*/ 0, TiDB::TypeLongLong)); + EXPECT_EQ(parseToDebugString(*context, expr), R"raw({"op":"equal","col":"a","value":"1"})raw"); + } +} +CATCH + +} // namespace DB::DM::tests From 732970c282ae662e52f4c6f2a54d4ecb3d5244d1 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 16:03:38 +0800 Subject: [PATCH 4/9] save work Signed-off-by: xufei --- dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 37355ba2109..e9ca7f0d780 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -222,7 +222,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::NullEQDecimal, "tidbNullEQ"}, {tipb::ScalarFuncSig::NullEQTime, "tidbNullEQ"}, {tipb::ScalarFuncSig::NullEQDuration, "tidbNullEQ"}, - {tipb::ScalarFuncSig::NullEQJson, "tidbNullEQ"}, + //{tipb::ScalarFuncSig::NullEQJson, "tidbNullEQ"}, {tipb::ScalarFuncSig::NullEQVectorFloat32, "tidbNullEQ"}, {tipb::ScalarFuncSig::PlusReal, "plus"}, From 268c2b1cfc74cc1f5827c2f2b0ff9eadcee4d2ec Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 16:26:26 +0800 Subject: [PATCH 5/9] save work Signed-off-by: xufei --- dbms/src/Functions/FunctionsComparison.cpp | 37 ++++++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index 99a3fb38954..2b6829363aa 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -174,16 +174,39 @@ class FunctionTiDBNullEQ : public IFunction getName()); auto & res_data = eq_vec_col->getData(); - if (left_nullmap != nullptr || right_nullmap != nullptr) + if (left_nullmap != nullptr && right_nullmap != nullptr) { + const auto & left_data = *left_nullmap; + const auto & right_data = *right_nullmap; for (size_t i = 0; i < rows; ++i) { - const bool left_is_null = left_nullmap != nullptr && (*left_nullmap)[i]; - const bool right_is_null = right_nullmap != nullptr && (*right_nullmap)[i]; - if (left_is_null && right_is_null) - res_data[i] = 1; - else if (left_is_null || right_is_null) - res_data[i] = 0; + const UInt8 left_is_null = left_data[i] != 0; + const UInt8 right_is_null = right_data[i] != 0; + + const UInt8 any_null = left_is_null | right_is_null; + const UInt8 both_null = left_is_null & right_is_null; + + /// Keep equals result when `any_null == 0`, otherwise override it to 0. + /// Finally, override to 1 when `both_null == 1`. + res_data[i] = (res_data[i] & static_cast(!any_null)) | both_null; + } + } + else if (left_nullmap != nullptr) + { + const auto & left_data = *left_nullmap; + for (size_t i = 0; i < rows; ++i) + { + const UInt8 left_is_null = left_data[i] != 0; + res_data[i] &= static_cast(!left_is_null); + } + } + else if (right_nullmap != nullptr) + { + const auto & right_data = *right_nullmap; + for (size_t i = 0; i < rows; ++i) + { + const UInt8 right_is_null = right_data[i] != 0; + res_data[i] &= static_cast(!right_is_null); } } From 04afc2f5123184c7d0b0609432e27ea19b074ea3 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 17:12:07 +0800 Subject: [PATCH 6/9] Functions: normalize tidbNullEQ output --- dbms/src/Functions/FunctionsComparison.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index 2b6829363aa..9e4497a1ef6 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -94,19 +94,18 @@ class FunctionTiDBNullEQ : public IFunction } const ColumnPtr & other_col = left_col->onlyNull() ? right_col : left_col; - auto res_col = ColumnUInt8::create(); - auto & res_data = res_col->getData(); - res_data.resize(rows); if (other_col->isColumnNullable()) { const auto & other_nullmap = assert_cast(*other_col).getNullMapData(); + auto res_col = ColumnUInt8::create(); + auto & res_data = res_col->getData(); res_data.assign(other_nullmap.begin(), other_nullmap.end()); + block.getByPosition(result).column = std::move(res_col); } else { - std::fill(res_data.begin(), res_data.end(), 0); + block.getByPosition(result).column = ColumnUInt8::create(rows, 0); } - block.getByPosition(result).column = std::move(res_col); return; } @@ -188,7 +187,8 @@ class FunctionTiDBNullEQ : public IFunction /// Keep equals result when `any_null == 0`, otherwise override it to 0. /// Finally, override to 1 when `both_null == 1`. - res_data[i] = (res_data[i] & static_cast(!any_null)) | both_null; + const auto eq = static_cast(res_data[i] != 0); + res_data[i] = (eq & static_cast(!any_null)) | both_null; } } else if (left_nullmap != nullptr) @@ -197,7 +197,8 @@ class FunctionTiDBNullEQ : public IFunction for (size_t i = 0; i < rows; ++i) { const UInt8 left_is_null = left_data[i] != 0; - res_data[i] &= static_cast(!left_is_null); + const auto eq = static_cast(res_data[i] != 0); + res_data[i] = eq & static_cast(!left_is_null); } } else if (right_nullmap != nullptr) @@ -206,7 +207,8 @@ class FunctionTiDBNullEQ : public IFunction for (size_t i = 0; i < rows; ++i) { const UInt8 right_is_null = right_data[i] != 0; - res_data[i] &= static_cast(!right_is_null); + const auto eq = static_cast(res_data[i] != 0); + res_data[i] = eq & static_cast(!right_is_null); } } From e5cf3a0a2f7dbb897282fb454010b8ae2d047ab5 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 17:42:01 +0800 Subject: [PATCH 7/9] save work Signed-off-by: xufei --- dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp index 0f8703c001a..92fb1c5bdd9 100644 --- a/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp +++ b/dbms/src/Storages/DeltaMerge/FilterParser/FilterParser.cpp @@ -215,7 +215,7 @@ inline RSOperatorPtr parseTiCompareExpr( // if ((expr.sig() == tipb::ScalarFuncSig::NullEQInt || expr.sig() == tipb::ScalarFuncSig::NullEQReal || expr.sig() == tipb::ScalarFuncSig::NullEQString || expr.sig() == tipb::ScalarFuncSig::NullEQDecimal || expr.sig() == tipb::ScalarFuncSig::NullEQTime || expr.sig() == tipb::ScalarFuncSig::NullEQDuration - || expr.sig() == tipb::ScalarFuncSig::NullEQJson || expr.sig() == tipb::ScalarFuncSig::NullEQVectorFloat32) + || expr.sig() == tipb::ScalarFuncSig::NullEQVectorFloat32) && values[0].isNull()) return createIsNull(attr); return createEqual(attr, values[0]); From 924524e5fc8518d6d6f730a933e0c40262e76f11 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 20:27:43 +0800 Subject: [PATCH 8/9] [Cross:tests] Cover more tidbNullEQ sigs --- .../tests/gtest_tidb_null_eq_func.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp b/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp index 398936c93d9..6cf0f67208d 100644 --- a/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp +++ b/dbms/src/Flash/Coprocessor/tests/gtest_tidb_null_eq_func.cpp @@ -27,6 +27,22 @@ TEST(TiDBNullEQFuncTest, DagUtilsMappedToTidbNullEQ) ASSERT_TRUE(isScalarFunctionExpr(expr)); ASSERT_EQ(getFunctionName(expr), "tidbNullEQ"); } + { + tipb::Expr expr; + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.set_sig(tipb::ScalarFuncSig::NullEQString); + + ASSERT_TRUE(isScalarFunctionExpr(expr)); + ASSERT_EQ(getFunctionName(expr), "tidbNullEQ"); + } + { + tipb::Expr expr; + expr.set_tp(tipb::ExprType::ScalarFunc); + expr.set_sig(tipb::ScalarFuncSig::NullEQDecimal); + + ASSERT_TRUE(isScalarFunctionExpr(expr)); + ASSERT_EQ(getFunctionName(expr), "tidbNullEQ"); + } { tipb::Expr expr; expr.set_tp(tipb::ExprType::ScalarFunc); From a3936977cdca5cfed9b0c6fe7addd5d8da8aecf7 Mon Sep 17 00:00:00 2001 From: xufei Date: Sat, 14 Feb 2026 21:03:45 +0800 Subject: [PATCH 9/9] fix format Signed-off-by: xufei --- dbms/src/Functions/FunctionsComparison.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/FunctionsComparison.cpp b/dbms/src/Functions/FunctionsComparison.cpp index 9e4497a1ef6..09350a4a52a 100644 --- a/dbms/src/Functions/FunctionsComparison.cpp +++ b/dbms/src/Functions/FunctionsComparison.cpp @@ -109,7 +109,7 @@ class FunctionTiDBNullEQ : public IFunction return; } - auto unwrapNullableColumn = [rows](const ColumnPtr & col, ColumnPtr & nested_col, const NullMap *& nullmap) { + auto unwrap_nullable_column = [rows](const ColumnPtr & col, ColumnPtr & nested_col, const NullMap *& nullmap) { nested_col = col; nullmap = nullptr; @@ -136,11 +136,11 @@ class FunctionTiDBNullEQ : public IFunction ColumnPtr left_nested_col = left_col; const NullMap * left_nullmap = nullptr; - unwrapNullableColumn(left_col, left_nested_col, left_nullmap); + unwrap_nullable_column(left_col, left_nested_col, left_nullmap); ColumnPtr right_nested_col = right_col; const NullMap * right_nullmap = nullptr; - unwrapNullableColumn(right_col, right_nested_col, right_nullmap); + unwrap_nullable_column(right_col, right_nested_col, right_nullmap); /// Execute `equals` on nested columns. Block temp_block;