From d3d1a363f1ebf8d957de610bf069ccf6107be4e7 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 31 Oct 2023 15:50:53 +0800 Subject: [PATCH 1/5] [refactor](function) imporve compoundPred optimization work with children is nullable --- be/src/vec/exprs/vcompound_pred.h | 231 +++++++++++++++++++++--------- 1 file changed, 165 insertions(+), 66 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 94994f7d941e68..239e70ba7df06f 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -23,6 +23,7 @@ #include "vec/columns/column.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/data_types/data_type_number.h" #include "vec/exprs/vectorized_fn_call.h" #include "vec/exprs/vexpr.h" @@ -52,86 +53,190 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } Status execute(VExprContext* context, Block* block, int* result_column_id) override { - if (children().size() == 1 || !_all_child_is_compound_and_not_const() || - _children[0]->is_nullable() || _children[1]->is_nullable()) { - // TODO: - // When the child is nullable, make the optimization also take effect, and the processing of this piece may be more complicated - // https://dev.mysql.com/doc/refman/8.0/en/logical-operators.html + if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { return VectorizedFnCall::execute(context, block, result_column_id); } int lhs_id = -1; int rhs_id = -1; RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id)); - ColumnPtr lhs_column = - block->get_by_position(lhs_id).column->convert_to_full_column_if_const(); - + //should deal with const column ? + ColumnPtr lhs_column = block->get_by_position(lhs_id).column; size_t size = lhs_column->size(); - uint8* __restrict data = _get_raw_data(lhs_column); - int filted = simd::count_zero_num((int8_t*)data, size); - bool full = filted == 0; - bool empty = filted == size; + bool lhs_is_nullable = lhs_column->is_nullable(); + auto [lhs_data_column, lhs_null_map] = + _get_raw_data_and_null_map(lhs_column, lhs_is_nullable); + int filted = simd::count_zero_num((int8_t*)lhs_data_column, size); + bool lhs_all_true = (filted == 0); + bool lhs_all_false = (filted == size); + + bool lhs_all_is_not_null = false; + if (lhs_is_nullable) { + filted = simd::count_zero_num((int8_t*)lhs_null_map, size); + lhs_all_is_not_null = (filted == size); + } ColumnPtr rhs_column = nullptr; - uint8* __restrict data_rhs = nullptr; - bool full_rhs = false; - bool empty_rhs = false; + uint8* __restrict rhs_data_column = nullptr; + uint8* __restrict rhs_null_map = nullptr; + bool rhs_is_nullable = false; + bool rhs_all_true = false; + bool rhs_all_false = false; + bool rhs_all_is_not_null = false; auto get_rhs_colum = [&]() { if (rhs_id == -1) { RETURN_IF_ERROR(_children[1]->execute(context, block, &rhs_id)); - rhs_column = - block->get_by_position(rhs_id).column->convert_to_full_column_if_const(); - data_rhs = _get_raw_data(rhs_column); - int filted = simd::count_zero_num((int8_t*)data_rhs, size); - full_rhs = filted == 0; - empty_rhs = filted == size; + rhs_column = block->get_by_position(rhs_id).column; + rhs_is_nullable = rhs_column->is_nullable(); + auto rhs_nullable_column = _get_raw_data_and_null_map(rhs_column, rhs_is_nullable); + rhs_data_column = rhs_nullable_column.first; + rhs_null_map = rhs_nullable_column.second; + int filted = simd::count_zero_num((int8_t*)rhs_data_column, size); + rhs_all_true = (filted == 0); + rhs_all_false = (filted == size); + if (rhs_is_nullable) { + filted = simd::count_zero_num((int8_t*)rhs_null_map, size); + rhs_all_is_not_null = (filted == size); + } } return Status::OK(); }; - + // false and NULL ----> 0 + // true and NULL ----> NULL if (_op == TExprOpcode::COMPOUND_AND) { - if (empty) { - // empty and any = empty, return lhs + //not null column: all data is false + //nullable column: null map all is not null + if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { + // false and any = false, return lhs *result_column_id = lhs_id; } else { RETURN_IF_ERROR(get_rhs_colum()); - if (full) { - // full and any = any, return rhs + if ((lhs_all_true && !lhs_is_nullable) || //not null column + (lhs_all_true && lhs_all_is_not_null)) { //nullable column + // true and any = any, return rhs *result_column_id = rhs_id; - } else if (empty_rhs) { - // any and empty = empty, return rhs + } else if ((rhs_all_false && !rhs_is_nullable) || + (rhs_all_false && rhs_all_is_not_null)) { + // any and false = false, return rhs *result_column_id = rhs_id; - } else if (full_rhs) { - // any and full = any, return lhs + } else if ((rhs_all_true && !rhs_is_nullable) || + (rhs_all_true && rhs_all_is_not_null)) { + // any and true = any, return lhs *result_column_id = lhs_id; } else { - *result_column_id = lhs_id; - for (size_t i = 0; i < size; i++) { - data[i] &= data_rhs[i]; + bool res_nullable = (lhs_is_nullable || rhs_is_nullable); + if (!res_nullable) { + *result_column_id = lhs_id; + for (size_t i = 0; i < size; i++) { + lhs_data_column[i] &= rhs_data_column[i]; + } + } else { + auto col_res = ColumnUInt8::create(size); + auto col_nulls = ColumnUInt8::create(size); + auto* __restrict res_datas = + assert_cast(col_res)->get_data().data(); + auto* __restrict res_nulls = + assert_cast(col_nulls)->get_data().data(); + ColumnPtr temp_null_map = nullptr; + if ((lhs_is_nullable && !rhs_is_nullable) || + (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable + if (lhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + lhs_null_map = assert_cast( + temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + if (rhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + rhs_null_map = assert_cast( + temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + } + + for (size_t i = 0; i < size; ++i) { + res_nulls[i] = + (lhs_null_map[i] & rhs_null_map[i]) | + (rhs_null_map[i] & (lhs_null_map[i] ^ lhs_data_column[i])) | + (lhs_null_map[i] & (rhs_null_map[i] ^ rhs_data_column[i])); + res_datas[i] = lhs_data_column[i] & rhs_data_column[i]; + } + auto result_column = + ColumnNullable::create(std::move(col_res), std::move(col_nulls)); + auto result_type = make_nullable(std::make_shared()); + *result_column_id = block->columns(); + block->insert({std::move(result_column), result_type, _expr_name}); } } } } else if (_op == TExprOpcode::COMPOUND_OR) { - if (full) { - // full or any = full, return lhs + // true or NULL ----> 1 + // false or NULL ----> NULL + if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { + // true or any = true, return lhs *result_column_id = lhs_id; } else { RETURN_IF_ERROR(get_rhs_colum()); - if (empty) { - // empty or any = any, return rhs + if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { + // false or any = any, return rhs *result_column_id = rhs_id; - } else if (full_rhs) { - // any or full = full, return rhs + } else if ((rhs_all_true && !rhs_is_nullable) || + (rhs_all_true && rhs_all_is_not_null)) { + // any or true = true, return rhs *result_column_id = rhs_id; - } else if (empty_rhs) { - // any or empty = any, return lhs + } else if ((rhs_all_false && !rhs_is_nullable) || + (rhs_all_false && rhs_all_is_not_null)) { + // any or false = any, return lhs *result_column_id = lhs_id; } else { - *result_column_id = lhs_id; - for (size_t i = 0; i < size; i++) { - data[i] |= data_rhs[i]; + bool res_nullable = (lhs_is_nullable || rhs_is_nullable); + if (!res_nullable) { + *result_column_id = lhs_id; + for (size_t i = 0; i < size; i++) { + lhs_data_column[i] |= rhs_data_column[i]; + } + } else { + auto col_res = ColumnUInt8::create(size); + auto col_nulls = ColumnUInt8::create(size); + auto* __restrict res_datas = + assert_cast(col_res)->get_data().data(); + auto* __restrict res_nulls = + assert_cast(col_nulls)->get_data().data(); + ColumnPtr temp_null_map = nullptr; + if ((lhs_is_nullable && !rhs_is_nullable) || + (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable + if (lhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + lhs_null_map = assert_cast( + temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + if (rhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + rhs_null_map = assert_cast( + temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + } + + for (size_t i = 0; i < size; ++i) { + res_nulls[i] = + (lhs_null_map[i] & rhs_null_map[i]) | + (rhs_null_map[i] & (rhs_null_map[i] ^ lhs_data_column[i])) | + (lhs_null_map[i] & (lhs_null_map[i] ^ rhs_data_column[i])); + res_datas[i] = lhs_data_column[i] | rhs_data_column[i]; + } + auto result_column = + ColumnNullable::create(std::move(col_res), std::move(col_nulls)); + auto result_type = make_nullable(std::make_shared()); + *result_column_id = block->columns(); + block->insert({std::move(result_column), result_type, _expr_name}); } } } @@ -155,29 +260,23 @@ class VCompoundPred : public VectorizedFnCall { return true; } - uint8* _get_raw_data(ColumnPtr column) const { - if (column->is_nullable()) { - return assert_cast( - assert_cast(column->assume_mutable().get()) - ->get_nested_column_ptr() - .get()) - ->get_data() - .data(); - } else { - return assert_cast(column->assume_mutable().get())->get_data().data(); - } - } - - uint8* _get_null_map(ColumnPtr column) const { - if (column->is_nullable()) { - return assert_cast( - assert_cast(column->assume_mutable().get()) - ->get_null_map_column_ptr() - .get()) - ->get_data() - .data(); + std::pair _get_raw_data_and_null_map(ColumnPtr column, + bool nullable_column) const { + if (nullable_column) { + auto* nullable_column = assert_cast(column->assume_mutable().get()); + auto* data_column = + assert_cast(nullable_column->get_nested_column_ptr().get()) + ->get_data() + .data(); + auto* null_map = + assert_cast(nullable_column->get_null_map_column_ptr().get()) + ->get_data() + .data(); + return std::make_pair(data_column, null_map); } else { - return nullptr; + auto* data_column = + assert_cast(column->assume_mutable().get())->get_data().data(); + return std::make_pair(data_column, nullptr); } } From 61dcafafee8706dff097fbb0b47cb1dd152bdbf7 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 31 Oct 2023 18:19:33 +0800 Subject: [PATCH 2/5] update review --- be/src/vec/exprs/vcompound_pred.h | 192 ++++++++++++++++-------------- 1 file changed, 104 insertions(+), 88 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 239e70ba7df06f..2326accdff9ece 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -19,6 +19,7 @@ #include #include "common/status.h" +#include "gutil/integral_types.h" #include "util/simd/bits.h" #include "vec/columns/column.h" #include "vec/columns/columns_number.h" @@ -83,6 +84,7 @@ class VCompoundPred : public VectorizedFnCall { bool rhs_all_true = false; bool rhs_all_false = false; bool rhs_all_is_not_null = false; + bool result_is_nullable = _data_type->is_nullable(); auto get_rhs_colum = [&]() { if (rhs_id == -1) { @@ -102,74 +104,100 @@ class VCompoundPred : public VectorizedFnCall { } return Status::OK(); }; + + auto create_nullable_result_column = [&](ColumnPtr nested_column) { + auto result_column = + ColumnNullable::create(nested_column, ColumnUInt8::create(size, 0)); + *result_column_id = block->columns(); + block->insert({std::move(result_column), _data_type, _expr_name}); + }; + + auto vector_vector_null = [&]() { + auto col_res = ColumnUInt8::create(size); + auto col_nulls = ColumnUInt8::create(size); + auto* __restrict res_datas = assert_cast(col_res)->get_data().data(); + auto* __restrict res_nulls = assert_cast(col_nulls)->get_data().data(); + ColumnPtr temp_null_map = nullptr; + if ((lhs_is_nullable && !rhs_is_nullable) || + (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable + if (lhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + lhs_null_map = assert_cast(temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + if (rhs_null_map == nullptr) { + temp_null_map = ColumnUInt8::create(size, 0); + rhs_null_map = assert_cast(temp_null_map->assume_mutable().get()) + ->get_data() + .data(); + } + } + if constexpr (is_and_op) { + for (size_t i = 0; i < size; ++i) { + res_nulls[i] = apply_and_null(lhs_data_column[i], lhs_null_map[i], + rhs_data_column[i], rhs_null_map[i]); + res_datas[i] = lhs_data_column[i] & rhs_data_column[i]; + } + } else { + for (size_t i = 0; i < size; ++i) { + res_nulls[i] = apply_or_null(lhs_data_column[i], lhs_null_map[i], + rhs_data_column[i], rhs_null_map[i]); + res_datas[i] = lhs_data_column[i] | rhs_data_column[i]; + } + } + auto result_column = ColumnNullable::create(std::move(col_res), std::move(col_nulls)); + *result_column_id = block->columns(); + block->insert({std::move(result_column), _data_type, _expr_name}); + }; + // false and NULL ----> 0 // true and NULL ----> NULL if (_op == TExprOpcode::COMPOUND_AND) { - //not null column: all data is false - //nullable column: null map all is not null + //1. not null column: all data is false + //2. nullable column: null map all is not null if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false and any = false, return lhs - *result_column_id = lhs_id; + if (result_is_nullable && !lhs_is_nullable) { + create_nullable_result_column(lhs_column); + } else { + *result_column_id = lhs_id; + } } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_true && !lhs_is_nullable) || //not null column (lhs_all_true && lhs_all_is_not_null)) { //nullable column // true and any = any, return rhs - *result_column_id = rhs_id; + if (result_is_nullable && !rhs_is_nullable) { + create_nullable_result_column(rhs_column); + } else { + *result_column_id = rhs_id; + } } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any and false = false, return rhs - *result_column_id = rhs_id; + if (result_is_nullable && !rhs_is_nullable) { + create_nullable_result_column(rhs_column); + } else { + *result_column_id = rhs_id; + } } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any and true = any, return lhs - *result_column_id = lhs_id; + if (result_is_nullable && !lhs_is_nullable) { + create_nullable_result_column(lhs_column); + } else { + *result_column_id = lhs_id; + } } else { - bool res_nullable = (lhs_is_nullable || rhs_is_nullable); - if (!res_nullable) { + if (!result_is_nullable) { *result_column_id = lhs_id; for (size_t i = 0; i < size; i++) { lhs_data_column[i] &= rhs_data_column[i]; } } else { - auto col_res = ColumnUInt8::create(size); - auto col_nulls = ColumnUInt8::create(size); - auto* __restrict res_datas = - assert_cast(col_res)->get_data().data(); - auto* __restrict res_nulls = - assert_cast(col_nulls)->get_data().data(); - ColumnPtr temp_null_map = nullptr; - if ((lhs_is_nullable && !rhs_is_nullable) || - (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable - if (lhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - lhs_null_map = assert_cast( - temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - if (rhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - rhs_null_map = assert_cast( - temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - } - - for (size_t i = 0; i < size; ++i) { - res_nulls[i] = - (lhs_null_map[i] & rhs_null_map[i]) | - (rhs_null_map[i] & (lhs_null_map[i] ^ lhs_data_column[i])) | - (lhs_null_map[i] & (rhs_null_map[i] ^ rhs_data_column[i])); - res_datas[i] = lhs_data_column[i] & rhs_data_column[i]; - } - auto result_column = - ColumnNullable::create(std::move(col_res), std::move(col_nulls)); - auto result_type = make_nullable(std::make_shared()); - *result_column_id = block->columns(); - block->insert({std::move(result_column), result_type, _expr_name}); + vector_vector_null.template operator()(); } } } @@ -178,65 +206,44 @@ class VCompoundPred : public VectorizedFnCall { // false or NULL ----> NULL if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { // true or any = true, return lhs - *result_column_id = lhs_id; + if (result_is_nullable && !lhs_is_nullable) { + create_nullable_result_column(lhs_column); + } else { + *result_column_id = lhs_id; + } } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false or any = any, return rhs - *result_column_id = rhs_id; + if (result_is_nullable && !rhs_is_nullable) { + create_nullable_result_column(rhs_column); + } else { + *result_column_id = rhs_id; + } } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any or true = true, return rhs - *result_column_id = rhs_id; + if (result_is_nullable && !rhs_is_nullable) { + create_nullable_result_column(rhs_column); + } else { + *result_column_id = rhs_id; + } } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any or false = any, return lhs - *result_column_id = lhs_id; + if (result_is_nullable && !lhs_is_nullable) { + create_nullable_result_column(lhs_column); + } else { + *result_column_id = lhs_id; + } } else { - bool res_nullable = (lhs_is_nullable || rhs_is_nullable); - if (!res_nullable) { + if (!result_is_nullable) { *result_column_id = lhs_id; for (size_t i = 0; i < size; i++) { lhs_data_column[i] |= rhs_data_column[i]; } } else { - auto col_res = ColumnUInt8::create(size); - auto col_nulls = ColumnUInt8::create(size); - auto* __restrict res_datas = - assert_cast(col_res)->get_data().data(); - auto* __restrict res_nulls = - assert_cast(col_nulls)->get_data().data(); - ColumnPtr temp_null_map = nullptr; - if ((lhs_is_nullable && !rhs_is_nullable) || - (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable - if (lhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - lhs_null_map = assert_cast( - temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - if (rhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - rhs_null_map = assert_cast( - temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - } - - for (size_t i = 0; i < size; ++i) { - res_nulls[i] = - (lhs_null_map[i] & rhs_null_map[i]) | - (rhs_null_map[i] & (rhs_null_map[i] ^ lhs_data_column[i])) | - (lhs_null_map[i] & (lhs_null_map[i] ^ rhs_data_column[i])); - res_datas[i] = lhs_data_column[i] | rhs_data_column[i]; - } - auto result_column = - ColumnNullable::create(std::move(col_res), std::move(col_nulls)); - auto result_type = make_nullable(std::make_shared()); - *result_column_id = block->columns(); - block->insert({std::move(result_column), result_type, _expr_name}); + vector_vector_null.template operator()(); } } } @@ -250,6 +257,15 @@ class VCompoundPred : public VectorizedFnCall { bool is_compound_predicate() const override { return true; } private: + static inline constexpr uint8 apply_and_null(UInt8 a, UInt8 l_null, UInt8 b, UInt8 r_null) { + // (<> && false) is false, (true && NULL) is NULL + return (l_null & r_null) | (r_null & (l_null ^ a)) | (l_null & (r_null ^ b)); + } + static inline constexpr uint8 apply_or_null(UInt8 a, UInt8 l_null, UInt8 b, UInt8 r_null) { + // (<> || true) is true, (false || NULL) is NULL + return (l_null & r_null) | (r_null & (r_null ^ a)) | (l_null & (l_null ^ b)); + } + bool _all_child_is_compound_and_not_const() const { for (auto child : _children) { // we can make sure non const compound predicate's return column is allow modifyied locally. From e569a6b3fcd4b20521526d7c88e6a182373eefbd Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Tue, 31 Oct 2023 22:16:36 +0800 Subject: [PATCH 3/5] add test case --- .../data/correctness_p0/test_and_or.out | 77 +++++++++++++++++++ .../suites/correctness_p0/test_and_or.groovy | 74 ++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 regression-test/data/correctness_p0/test_and_or.out create mode 100644 regression-test/suites/correctness_p0/test_and_or.groovy diff --git a/regression-test/data/correctness_p0/test_and_or.out b/regression-test/data/correctness_p0/test_and_or.out new file mode 100644 index 00000000000000..96cb7c10a94738 --- /dev/null +++ b/regression-test/data/correctness_p0/test_and_or.out @@ -0,0 +1,77 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select3 -- +1 \N true false true false true true +2 \N true false true false false false + +-- !select1 -- +false +false + +-- !select2 -- +false +false + +-- !select3 -- +true +true + +-- !select4 -- +true +true + +-- !select5 -- +false +true + +-- !select6 -- +false +true + +-- !select7 -- +false +false + +-- !select8 -- +\N +\N + +-- !select9 -- +false +true + +-- !select10 -- +false +false + +-- !select11 -- +\N +\N + +-- !select12 -- +\N +\N + +-- !select13 -- +\N +\N + +-- !select14 -- +\N +\N + +-- !select15 -- +false +true + +-- !select16 -- +false +true + +-- !select17 -- +false +true + +-- !select18 -- +false +false + diff --git a/regression-test/suites/correctness_p0/test_and_or.groovy b/regression-test/suites/correctness_p0/test_and_or.groovy new file mode 100644 index 00000000000000..5e3569e26dad5a --- /dev/null +++ b/regression-test/suites/correctness_p0/test_and_or.groovy @@ -0,0 +1,74 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// The cases is copied from https://github.com/trinodb/trino/tree/master +// /testing/trino-product-tests/src/main/resources/sql-tests/testcases/aggregate +// and modified by Doris. + +suite("test_and_or") { + def tableName = "test_and_or" + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE `test_and_or` ( + `id` int(11) NULL, + `k_null` boolean NULL, + `k_true` boolean NULL, + `k_false` boolean NULL, + `k_true_not` boolean not NULL, + `k_false_not` boolean not NULL, + `k_true_false` boolean not NULL, + `k_true_false_not` boolean not NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ insert into test_and_or values(1,NULL,true,false,true,false,true,true)""" + sql "insert into test_and_or values(2,NULL,true,false,true,false,false,false);" + + + qt_select3 """select * from test_and_or order by 1,2,3;""" + qt_select1 """ select (k_false_not and k_false_not) and (k_null or k_true) from test_and_or order by 1;""" + qt_select2 """ select (k_null and k_false) and (k_null or k_true) from test_and_or order by 1;""" + qt_select3 """ select (k_null or k_true) and (k_false_not or k_true_not) from test_and_or order by 1;""" + qt_select4 """ select (k_true_not or k_false_not) and (k_null or k_true) from test_and_or order by 1;""" + qt_select5 """ select (k_true_false_not or k_true_false) and (k_true_not or k_false_not) from test_and_or order by 1;""" + qt_select6 """ select (k_true_not or k_true_false) and (k_true_false_not or k_false_not) from test_and_or order by 1;""" + qt_select7 """ select (k_true_not and k_false_not) and (k_true_false_not or k_true) from test_and_or order by 1;""" + qt_select8 """ select (k_null or k_false) and (k_false_not or k_true_not) from test_and_or order by 1; """ + qt_select9 """ select (k_false and k_true_not) or (k_true and k_true_false_not) from test_and_or order by 1;""" + qt_select10 """ select (k_false_not and k_true_not) or (k_null and k_false) from test_and_or order by 1;""" + qt_select11 """ select (k_false_not and k_false_not) or (k_null and k_true) from test_and_or order by 1;""" + qt_select12 """ select (k_null and k_false) or (k_null and k_true) from test_and_or order by 1;""" + qt_select13 """ select (k_null and k_true) or (k_false_not and k_true_not) from test_and_or order by 1;""" + qt_select14 """ select (k_true_not and k_false_not) or (k_null and k_true) from test_and_or order by 1;""" + qt_select15 """ select (k_true_false_not and k_true_false) or (k_true_not and k_false_not) from test_and_or order by 1;""" + qt_select16 """ select (k_true_not and k_true_false) or(k_true_false_not and k_false_not) from test_and_or order by 1;""" + qt_select17 """ select (k_true_not and k_false_not) or (k_true_false_not and k_true) from test_and_or order by 1;""" + qt_select18 """ select (k_null and k_false) or (k_false_not and k_true_not) from test_and_or order by 1; """ +} From f4d1f8d3fe2095871e13eeee8a8a8e0895ea7f29 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Wed, 1 Nov 2023 18:40:39 +0800 Subject: [PATCH 4/5] update test case --- .../data/correctness_p0/test_and_or.out | 172 +++++++++++++++++- .../suites/correctness_p0/test_and_or.groovy | 31 +++- 2 files changed, 198 insertions(+), 5 deletions(-) diff --git a/regression-test/data/correctness_p0/test_and_or.out b/regression-test/data/correctness_p0/test_and_or.out index 96cb7c10a94738..535d01c2273bfc 100644 --- a/regression-test/data/correctness_p0/test_and_or.out +++ b/regression-test/data/correctness_p0/test_and_or.out @@ -1,77 +1,247 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !select3 -- +-- !select1 -- 1 \N true false true false true true 2 \N true false true false false false +3 \N true false true false false true +4 \N true false true false true false + +-- !select3 -- +false +false +false +false + +-- !select3 -- +false +false +false +false + +-- !select3 -- +true +true +true +true + +-- !select3 -- +true +true +true +true + +-- !select3 -- +\N +\N +false +false + +-- !select3 -- +false +false +false +false + +-- !select3 -- +false +true +true +true + +-- !select3 -- +\N +\N +\N +\N + +-- !select3 -- +false +false +true +true + +-- !select3 -- +false +false +false +true + +-- !select3 -- +false +false +true +true + +-- !select3 -- +false +false +true +true + +-- !select3 -- +false +false +false +false + +-- !select3 -- +\N +\N +\N +\N + +-- !select3 -- +true +true +true +true + +-- !select3 -- +true +true +true +true + +-- !select3 -- +false +false +true +true + +-- !select3 -- +\N +\N +\N +\N + +-- !select3 -- +false +true +true +true + +-- !select3 -- +false +false +false +true + +-- !select3 -- +false +false +false +true + +-- !select3 -- +false +false +true +true -- !select1 -- false false +false +false -- !select2 -- false false +false +false -- !select3 -- true true +true +true -- !select4 -- true true +true +true -- !select5 -- false true +true +true -- !select6 -- false +false +true true -- !select7 -- false false +false +false -- !select8 -- \N \N +\N +\N -- !select9 -- false +false +true true -- !select10 -- false false +false +false -- !select11 -- \N \N +\N +\N -- !select12 -- \N \N +\N +\N -- !select13 -- \N \N +\N +\N -- !select14 -- \N \N +\N +\N -- !select15 -- false +false +false true -- !select16 -- false +false +true true -- !select17 -- false +false +true true -- !select18 -- false false +false +false diff --git a/regression-test/suites/correctness_p0/test_and_or.groovy b/regression-test/suites/correctness_p0/test_and_or.groovy index 5e3569e26dad5a..8ad00122baff27 100644 --- a/regression-test/suites/correctness_p0/test_and_or.groovy +++ b/regression-test/suites/correctness_p0/test_and_or.groovy @@ -48,11 +48,34 @@ suite("test_and_or") { ); """ - sql """ insert into test_and_or values(1,NULL,true,false,true,false,true,true)""" + sql """ insert into test_and_or values(1,NULL,true,false,true,false,true,true);""" sql "insert into test_and_or values(2,NULL,true,false,true,false,false,false);" - - - qt_select3 """select * from test_and_or order by 1,2,3;""" + sql "insert into test_and_or values(3,NULL,true,false,true,false,false,true);" + sql "insert into test_and_or values(4,NULL,true,false,true,false,true,false);" + qt_select1 """select * from test_and_or order by 1,2,3;""" + qt_select3 """ select (k_false_not and non_nullable(k_false)) and (k_null or k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_null and k_false) and (k_null or k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_null or k_true) and (k_false_not or k_true_not) from test_and_or order by 1; """ + qt_select3 """ select (k_true_not or k_false_not) and (k_null or k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not and k_true_false_not) from test_and_or order by 1; """ + qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not and k_false) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false_not or k_true_false) and (k_true_not or k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_null or k_false) and (k_false_not or k_true_not) from test_and_or order by 1; """ + qt_select3 """ select (k_true_not and k_true_false_not or k_true_false) and (k_false_not or k_true_false_not) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false or k_false) and (k_false_not or k_true_false_not) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false and k_false) or (k_false_not and k_true_false_not) from test_and_or order by 1; """ + qt_select3 """ select (k_false_not and k_true_not) or (k_null and k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_false_not and k_true or k_true_false_not) or (k_true_not and non_nullable(k_true)) from test_and_or order by 1; """ + qt_select3 """ select (k_false_not and k_true_not or k_true_false_not) or (k_true_not and k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false_not and k_true_not) or (k_false and k_true) from test_and_or order by 1; """ + qt_select3 """ select (k_null and k_true) or (k_false_not and k_false) from test_and_or order by 1; """ + qt_select3 """ select (k_false_not and k_true_not or k_true_false) or (k_true_not and k_true_false_not) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false and k_true_false_not) or (k_false_not and k_true_not or k_false) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false and k_true_false_not) or (k_false_not and k_true_not or k_false) from test_and_or order by 1; """ + qt_select3 """ select (k_true_false and k_true_false_not) or (cast(id as boolean) and (k_true_false_not)) from test_and_or order by 1; """ + qt_select1 """ select (k_false_not and k_false_not) and (k_null or k_true) from test_and_or order by 1;""" qt_select2 """ select (k_null and k_false) and (k_null or k_true) from test_and_or order by 1;""" qt_select3 """ select (k_null or k_true) and (k_false_not or k_true_not) from test_and_or order by 1;""" From 321fdad78d0dce681f024de0ea7af81eaf741a43 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Fri, 3 Nov 2023 16:21:46 +0800 Subject: [PATCH 5/5] update review --- be/src/vec/exprs/vcompound_pred.h | 92 +++++++++++-------------------- 1 file changed, 31 insertions(+), 61 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 2326accdff9ece..708b3093a06141 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -61,7 +61,6 @@ class VCompoundPred : public VectorizedFnCall { int lhs_id = -1; int rhs_id = -1; RETURN_IF_ERROR(_children[0]->execute(context, block, &lhs_id)); - //should deal with const column ? ColumnPtr lhs_column = block->get_by_position(lhs_id).column; size_t size = lhs_column->size(); bool lhs_is_nullable = lhs_column->is_nullable(); @@ -105,11 +104,25 @@ class VCompoundPred : public VectorizedFnCall { return Status::OK(); }; - auto create_nullable_result_column = [&](ColumnPtr nested_column) { - auto result_column = - ColumnNullable::create(nested_column, ColumnUInt8::create(size, 0)); - *result_column_id = block->columns(); - block->insert({std::move(result_column), _data_type, _expr_name}); + auto return_result_column_id = [&](ColumnPtr res_column, int res_id) -> int { + if (result_is_nullable && !res_column->is_nullable()) { + auto result_column = + ColumnNullable::create(res_column, ColumnUInt8::create(size, 0)); + res_id = block->columns(); + block->insert({std::move(result_column), _data_type, _expr_name}); + } + return res_id; + }; + + auto create_null_map_column = [&](ColumnPtr null_map_column, + uint8* __restrict null_map_data) { + if (null_map_data == nullptr) { + null_map_column = ColumnUInt8::create(size, 0); + null_map_data = assert_cast(null_map_column->assume_mutable().get()) + ->get_data() + .data(); + } + return null_map_data; }; auto vector_vector_null = [&]() { @@ -118,21 +131,10 @@ class VCompoundPred : public VectorizedFnCall { auto* __restrict res_datas = assert_cast(col_res)->get_data().data(); auto* __restrict res_nulls = assert_cast(col_nulls)->get_data().data(); ColumnPtr temp_null_map = nullptr; - if ((lhs_is_nullable && !rhs_is_nullable) || - (!lhs_is_nullable || rhs_is_nullable)) { // one of children is nullable - if (lhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - lhs_null_map = assert_cast(temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - if (rhs_null_map == nullptr) { - temp_null_map = ColumnUInt8::create(size, 0); - rhs_null_map = assert_cast(temp_null_map->assume_mutable().get()) - ->get_data() - .data(); - } - } + // maybe both children are nullable / or one of children is nullable + lhs_null_map = create_null_map_column(temp_null_map, lhs_null_map); + rhs_null_map = create_null_map_column(temp_null_map, rhs_null_map); + if constexpr (is_and_op) { for (size_t i = 0; i < size; ++i) { res_nulls[i] = apply_and_null(lhs_data_column[i], lhs_null_map[i], @@ -158,38 +160,22 @@ class VCompoundPred : public VectorizedFnCall { //2. nullable column: null map all is not null if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false and any = false, return lhs - if (result_is_nullable && !lhs_is_nullable) { - create_nullable_result_column(lhs_column); - } else { - *result_column_id = lhs_id; - } + *result_column_id = return_result_column_id(lhs_column, lhs_id); } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_true && !lhs_is_nullable) || //not null column (lhs_all_true && lhs_all_is_not_null)) { //nullable column // true and any = any, return rhs - if (result_is_nullable && !rhs_is_nullable) { - create_nullable_result_column(rhs_column); - } else { - *result_column_id = rhs_id; - } + *result_column_id = return_result_column_id(rhs_column, rhs_id); } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any and false = false, return rhs - if (result_is_nullable && !rhs_is_nullable) { - create_nullable_result_column(rhs_column); - } else { - *result_column_id = rhs_id; - } + *result_column_id = return_result_column_id(rhs_column, rhs_id); } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any and true = any, return lhs - if (result_is_nullable && !lhs_is_nullable) { - create_nullable_result_column(lhs_column); - } else { - *result_column_id = lhs_id; - } + *result_column_id = return_result_column_id(lhs_column, lhs_id); } else { if (!result_is_nullable) { *result_column_id = lhs_id; @@ -206,36 +192,20 @@ class VCompoundPred : public VectorizedFnCall { // false or NULL ----> NULL if ((lhs_all_true && !lhs_is_nullable) || (lhs_all_true && lhs_all_is_not_null)) { // true or any = true, return lhs - if (result_is_nullable && !lhs_is_nullable) { - create_nullable_result_column(lhs_column); - } else { - *result_column_id = lhs_id; - } + *result_column_id = return_result_column_id(lhs_column, lhs_id); } else { RETURN_IF_ERROR(get_rhs_colum()); if ((lhs_all_false && !lhs_is_nullable) || (lhs_all_false && lhs_all_is_not_null)) { // false or any = any, return rhs - if (result_is_nullable && !rhs_is_nullable) { - create_nullable_result_column(rhs_column); - } else { - *result_column_id = rhs_id; - } + *result_column_id = return_result_column_id(rhs_column, rhs_id); } else if ((rhs_all_true && !rhs_is_nullable) || (rhs_all_true && rhs_all_is_not_null)) { // any or true = true, return rhs - if (result_is_nullable && !rhs_is_nullable) { - create_nullable_result_column(rhs_column); - } else { - *result_column_id = rhs_id; - } + *result_column_id = return_result_column_id(rhs_column, rhs_id); } else if ((rhs_all_false && !rhs_is_nullable) || (rhs_all_false && rhs_all_is_not_null)) { // any or false = any, return lhs - if (result_is_nullable && !lhs_is_nullable) { - create_nullable_result_column(lhs_column); - } else { - *result_column_id = lhs_id; - } + *result_column_id = return_result_column_id(lhs_column, lhs_id); } else { if (!result_is_nullable) { *result_column_id = lhs_id;