From 0744cb3b84480f6c3134a116a2c0851e8f2f72c0 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 21 Mar 2024 17:01:35 +0800 Subject: [PATCH 01/16] first support array_contains for expr push down inverted index --- .../rowset/segment_v2/segment_iterator.cpp | 48 ++++++++++++++-- .../olap/rowset/segment_v2/segment_iterator.h | 1 + be/src/vec/exprs/vcompound_pred.h | 50 +++++++++++++++++ be/src/vec/exprs/vectorized_fn_call.cpp | 22 ++++++++ be/src/vec/exprs/vectorized_fn_call.h | 7 +++ be/src/vec/exprs/vexpr.h | 11 ++++ be/src/vec/exprs/vexpr_context.cpp | 7 +++ be/src/vec/exprs/vexpr_context.h | 17 ++++++ .../functions/array/function_array_index.h | 56 +++++++++++++++++++ be/src/vec/functions/function.h | 38 ++++++++++++- 10 files changed, 251 insertions(+), 6 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index ad5b25f6620362..c74cbbb391d0a8 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -362,6 +362,12 @@ Status SegmentIterator::_lazy_init() { _segment->_tablet_schema->cluster_key_idxes().empty()) { RETURN_IF_ERROR(_get_row_ranges_by_keys()); } + _is_common_expr_column.resize(_schema->columns().size(), false); + if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { + for (auto expr : _remaining_conjunct_roots) { + RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + } + } RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); RETURN_IF_ERROR(_vec_init_lazy_materialization()); // Remove rows that have been marked deleted @@ -814,6 +820,17 @@ bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() { return true; } +bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) { + if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { + return false; + } + if (_inverted_index_iterators[col_id] == nullptr) { + //this column without inverted index + return false; + } + return true; +} + bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) { if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { return false; @@ -1203,6 +1220,33 @@ Status SegmentIterator::_apply_inverted_index() { } } + // support expr to evaluate inverted index + std::unordered_map> + iter_map; + + for (auto col_id : _common_expr_columns) { + if (_check_apply_by_inverted_index(col_id)) { + iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], + _inverted_index_iterators[col_id].get()); + } + } + for (auto exprCtx : _common_expr_ctxs_push_down) { + // _inverted_index_iterators has all column ids which has inverted index + // _common_expr_columns has all column ids from _common_expr_ctxs_push_down + // if current bitmap is already empty just return + if (_row_bitmap.isEmpty()) { + break; + } + roaring::Roaring bitmap = _row_bitmap; + const Status st = exprCtx->eval_inverted_indexs(iter_map, num_rows(), &bitmap); + if (!st.ok()) { + LOG(WARNING) << "failed to evaluate index in expr" << exprCtx->root()->debug_string() + << ", error msg: " << st; + } else { + _row_bitmap &= bitmap; + } + } + _col_predicates = std::move(remaining_predicates); _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality()); return Status::OK(); @@ -1574,11 +1618,7 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } // Step2: extract columns that can execute expr context - _is_common_expr_column.resize(_schema->columns().size(), false); if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { - for (auto expr : _remaining_conjunct_roots) { - RETURN_IF_ERROR(_extract_common_expr_columns(expr)); - } if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema->column_ids()) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index fb039246384fea..3726eaa486f0a9 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -284,6 +284,7 @@ class SegmentIterator : public RowwiseIterator { void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate); + bool _check_apply_by_inverted_index(ColumnId col_id); bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false); std::string _gen_predicate_result_sign(ColumnPredicate* predicate); diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 2ede99cae63c77..d03b05f4c08c9a 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -53,6 +53,56 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } + bool is_all_ones(const roaring::Roaring& r) { + return r.contains(0); + for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != r.end(); ++i) { + if (*i == 0) { + return false; + } + } + return true; + } + + // 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted + // 2. when meet 'and' conjunct, function with column b can not apply inverted index + // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted + // but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted + Status eval_inverted_index( + VExprContext* context, + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const override { + if (_op == TExprOpcode::COMPOUND_OR) { + for (auto child : _children) { + Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, + num_rows, bitmap); + if (!st.ok()) { + return st; + } + if (!bitmap->contains( + 0)) { // the left expr no need to be extracted by inverted index + return Status::OK(); + } + } + } else if (_op == TExprOpcode::COMPOUND_AND) { + for (auto child : _children) { + Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, + num_rows, bitmap); + if (!st.ok()) { + return st; + } + if (bitmap->isEmpty()) { // the left expr no need to be extracted by inverted index + return Status::OK(); + } + } + } else { + return Status::InternalError( + "Compound operator must be AND or OR can execute with inverted index."); + } + return Status::OK(); + } + Status execute(VExprContext* context, Block* block, int* result_column_id) override { if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { return VectorizedFnCall::execute(context, block, result_column_id); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index d039a4e3a32e54..c96bfb707902d7 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -136,6 +136,28 @@ void VectorizedFnCall::close(VExprContext* context, FunctionContext::FunctionSta VExpr::close(context, scope); } +Status VectorizedFnCall::eval_inverted_index( + VExprContext* context, + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const { + DCHECK_GE(get_num_children(), 1); + if (get_child(0)->is_slot_ref()) { + auto* column_slot_ref = assert_cast(get_child(0).get()); + if (auto iter = colId_invertedIndexIter_mapping.find(column_slot_ref->column_id()); + iter != colId_invertedIndexIter_mapping.end()) { + const auto& pair = iter->second; + return _function->eval_inverted_index(context->fn_context(_fn_context_index), + pair.first, pair.second, num_rows, bitmap); + } + } else { + return Status::InternalError("we can only eval inverted index for slot ref expr, but got ", + get_child(0)->expr_name()); + } + return Status::OK(); +} + Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, std::vector& args) { diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index 24cab0c94ba0d3..27e84c0343a40a 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -27,6 +27,7 @@ #include "udf/udf.h" #include "vec/core/column_numbers.h" #include "vec/exprs/vexpr.h" +#include "vec/exprs/vslot_ref.h" #include "vec/functions/function.h" namespace doris { @@ -50,6 +51,12 @@ class VectorizedFnCall : public VExpr { Status execute_runtime_fitler(doris::vectorized::VExprContext* context, doris::vectorized::Block* block, int* result_column_id, std::vector& args) override; + Status eval_inverted_index( + VExprContext* context, + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const override; Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; Status open(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) override; diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 9a6b514d03a82a..ce5e5324395fd2 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -30,6 +30,7 @@ #include #include "common/status.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "runtime/define_primitive_type.h" #include "runtime/large_int_value.h" #include "runtime/types.h" @@ -114,6 +115,16 @@ class VExpr { virtual Status execute(VExprContext* context, Block* block, int* result_column_id) = 0; + // execute current expr with inverted index to filter block. Given a roaringbitmap of match rows + virtual Status eval_inverted_index( + VExprContext* context, + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) const { + return Status::NotSupported("Not supported execute_with_inverted_index"); + } + // Only the 4th parameter is used in the runtime filter. In and MinMax need overwrite the // interface virtual Status execute_runtime_fitler(VExprContext* context, Block* block, diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index cdbf22a7209c05..e51a8b54e87790 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -119,6 +119,13 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal()); return _fn_contexts.size() - 1; } +Status VExprContext::eval_inverted_indexs( + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap) { + return _root->eval_inverted_index(this, colId_invertedIndexIter_mapping, num_rows, bitmap); +} Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) { if (vexpr_ctx == nullptr || block->rows() == 0) { diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 423e1aac12a540..24937a83e09940 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -25,6 +25,7 @@ #include "common/factory_creator.h" #include "common/status.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "runtime/types.h" #include "udf/udf.h" #include "vec/core/block.h" @@ -69,6 +70,22 @@ class VExprContext { return _fn_contexts[i].get(); } + // execute expr with inverted index which column a, b has inverted indexes + // but some situation although column b has indexes, but apply index is not useful, we should + // skip this expr, just do not apply index anymore. + /** + * @param name_with_types all columns with name and type in all _common_expr_ctxs_push_down see in SegmentIterator.h. + * @param inverted_indexs_iter columns which extracted from _common_expr_ctxs_push_down and has inverted index. + * @param num_rows number of rows in one segment. + * @param bitmap roaring bitmap to store the result. 0 is present filed by index. + * @return status not ok means execute failed. + */ + [[nodiscard]] Status eval_inverted_indexs( + const std::unordered_map>& + colId_invertedIndexIter_mapping, + uint32_t num_rows, roaring::Roaring* bitmap); + [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep); diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index d4a8fa32962fab..d1070b3e9d652e 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -25,6 +25,10 @@ #include #include "common/status.h" +#include "olap/column_predicate.h" +#include "olap/rowset/segment_v2/inverted_index_query_type.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" +#include "runtime/primitive_type.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_nullable.h" @@ -70,6 +74,11 @@ struct ArrayCountEqual { static constexpr void apply(ResultType& current, size_t j) noexcept { ++current; } }; +struct ParamValue { + PrimitiveType type; + StringRef tmp; +}; + template class FunctionArrayIndex : public IFunction { public: @@ -87,6 +96,53 @@ class FunctionArrayIndex : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + + DCHECK(context->get_num_args() >= 1); + DCHECK(context->get_arg_type(0)->is_array_type()); + // now we only support same + std::shared_ptr state = std::make_shared(); + // Field field; + state->tmp = context->get_constant_col(1)->column_ptr->get_data_at(0); + state->type = context->get_arg_type(1)->type; + context->set_function_state(scope, state); + return Status::OK(); + } + + /** + * eval inverted index. we can filter array rows with inverted index iter + */ + Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const override { + std::shared_ptr roaring = std::make_shared(); + auto* param_value = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + + RETURN_IF_ERROR(iter->read_from_inverted_index( + data_type_with_name.first, ¶m_value->tmp, + segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, num_rows, roaring)); + + // mask out null_bitmap, since NULL cmp VALUE will produce NULL + // and be treated as false in WHERE + // keep it after query, since query will try to read null_bitmap and put it to cache + if (iter->has_null()) { + segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + std::shared_ptr null_bitmap = null_bitmap_cache_handle.get_bitmap(); + if (null_bitmap) { + *bitmap -= *null_bitmap; + } + } + + *bitmap &= *roaring; + return Status::OK(); + } + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { if constexpr (OldVersion) { return make_nullable(std::make_shared>()); diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index df5d8d60942e30..26630a2af4efbf 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -31,6 +31,7 @@ #include "common/exception.h" #include "common/status.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "udf/udf.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" @@ -62,8 +63,10 @@ void has_variadic_argument_types(...); template concept HasGetVariadicArgumentTypesImpl = requires(T t) { - { t.get_variadic_argument_types_impl() } -> std::same_as; -}; + { + t.get_variadic_argument_types_impl() + } -> std::same_as; + }; bool have_null_column(const Block& block, const ColumnNumbers& args); bool have_null_column(const ColumnsWithTypeAndName& args); @@ -179,6 +182,14 @@ class IFunctionBase { ->execute(context, block, arguments, result, input_rows_count, dry_run); } + virtual Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const { + return Status::NotSupported("eval_inverted_index is not supported in function: ", + get_name()); + } + /// Do cleaning work when function is finished, i.e., release state variables in the /// `FunctionContext` which are registered in `prepare` phase. virtual Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) { @@ -395,6 +406,14 @@ class IFunction : public std::enable_shared_from_this, return Status::OK(); } + Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const override { + LOG(FATAL) << "eval_inverted_index is not implemented for IFunction"; + __builtin_unreachable(); + } + [[noreturn]] const DataTypes& get_argument_types() const final { LOG(FATAL) << "get_argument_types is not implemented for IFunction"; __builtin_unreachable(); @@ -427,6 +446,14 @@ class DefaultExecutable final : public PreparedFunctionImpl { size_t result, size_t input_rows_count) const final { return function->execute_impl(context, block, arguments, result, input_rows_count); } + + Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const { + return function->eval_inverted_index(context, data_type_with_name, iter, num_rows, bitmap); + } + Status execute_impl_dry_run(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const final { @@ -490,6 +517,13 @@ class DefaultFunction final : public IFunctionBase { function_name == "gt" || function_name == "le" || function_name == "ge"; } + Status eval_inverted_index(FunctionContext* context, + const vectorized::NameAndTypePair& data_type_with_name, + segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, + roaring::Roaring* bitmap) const override { + return function->eval_inverted_index(context, data_type_with_name, iter, num_rows, bitmap); + } + IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left, const Field& right) const override { return function->get_monotonicity_for_range(type, left, right); From 5bfddbd42ea94ec29f196dac7cd303b867bd40d9 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 27 Mar 2024 16:43:11 +0800 Subject: [PATCH 02/16] update query value --- .../segment_v2/inverted_index_reader.cpp | 29 ++++--- .../rowset/segment_v2/inverted_index_reader.h | 80 +++++++++++++++++-- be/src/vec/exprs/vexpr_context.h | 3 +- .../functions/array/function_array_index.h | 13 ++- 4 files changed, 103 insertions(+), 22 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 11c53bbabc03c2..b09f4dcb467cf1 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -252,11 +252,12 @@ Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - - std::string search_str = reinterpret_cast(query_value)->to_string(); + auto&& storage_value = + PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); + std::string search_str = reinterpret_cast(storage_value)->to_string(); LOG(INFO) << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; @@ -382,11 +383,13 @@ Status StringTypeInvertedIndexReader::new_iterator( Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - - const auto* search_query = reinterpret_cast(query_value); + auto&& storage_value = + PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); + const auto* search_query = reinterpret_cast(storage_value); auto act_len = strnlen(search_query->data, search_query->size); std::string search_str(search_query->data, act_len); // std::string search_str = reinterpret_cast(query_value)->to_string(); @@ -674,7 +677,7 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); @@ -688,7 +691,10 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_ return st; } std::string query_str; - _value_key_coder->full_encode_ascending(query_value, &query_str); + + auto&& storage_value = + PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); + _value_key_coder->full_encode_ascending(storage_value, &query_str); auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -1125,8 +1131,9 @@ lucene::util::bkd::relation InvertedIndexVisitor::compare(std::vector& bit_map, bool skip_try) { + const std::string& column_name, const void* query_value, PrimitiveType primitiveType, + InvertedIndexQueryType query_type, uint32_t segment_num_rows, + std::shared_ptr& bit_map, bool skip_try) { if (UNLIKELY(_reader == nullptr)) { throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); } @@ -1147,8 +1154,8 @@ Status InvertedIndexIterator::read_from_inverted_index( } } - RETURN_IF_ERROR( - _reader->query(_stats, _runtime_state, column_name, query_value, query_type, bit_map)); + RETURN_IF_ERROR(_reader->query(_stats, _runtime_state, column_name, query_value, primitiveType, + query_type, bit_map)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 63002da5c92c0e..76579d1820193c 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -72,6 +72,75 @@ class InvertedIndexIterator; class InvertedIndexQueryCacheHandle; class InvertedIndexFileReader; +struct PrimitiveTypeConvertorHelper { + static const void* convert_to_primitive_type(const PrimitiveType& primitiveType, + const void* value) { + switch (primitiveType) { + case PrimitiveType::TYPE_BOOLEAN: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_TINYINT: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_SMALLINT: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_INT: + return reinterpret_cast::StorageFieldType*>(value); + case PrimitiveType::TYPE_BIGINT: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_LARGEINT: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_FLOAT: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DOUBLE: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DECIMALV2: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DECIMAL32: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DECIMAL64: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DECIMAL128I: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DECIMAL256: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_DATE: + return reinterpret_cast::StorageFieldType*>(value); + case PrimitiveType::TYPE_DATETIME: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_CHAR: + return reinterpret_cast::StorageFieldType*>(value); + case PrimitiveType::TYPE_VARCHAR: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_HLL: + return reinterpret_cast::StorageFieldType*>(value); + case PrimitiveType::TYPE_STRING: + return reinterpret_cast::StorageFieldType*>( + value); + case PrimitiveType::TYPE_IPV4: + return reinterpret_cast::StorageFieldType*>(value); + case PrimitiveType::TYPE_IPV6: + return reinterpret_cast::StorageFieldType*>(value); + default: + LOG(FATAL) << "Unsupported primitive type for inverted index reader : " + << primitiveType; + return nullptr; + } + }; +}; + class InvertedIndexReader : public std::enable_shared_from_this { public: explicit InvertedIndexReader( @@ -86,7 +155,7 @@ class InvertedIndexReader : public std::enable_shared_from_this* iterator) = 0; virtual Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) = 0; virtual Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -160,7 +229,7 @@ class FullTextIndexReader : public InvertedIndexReader { std::unique_ptr* iterator) override; Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -193,7 +262,7 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { std::unique_ptr* iterator) override; Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -253,7 +322,7 @@ class BkdIndexReader : public InvertedIndexReader { Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -285,7 +354,8 @@ class InvertedIndexIterator { : _stats(stats), _runtime_state(runtime_state), _reader(std::move(reader)) {} Status read_from_inverted_index(const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, uint32_t segment_num_rows, + PrimitiveType primitiveType, InvertedIndexQueryType query_type, + uint32_t segment_num_rows, std::shared_ptr& bit_map, bool skip_try = false); Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 24937a83e09940..aa48edad02967b 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -74,8 +74,7 @@ class VExprContext { // but some situation although column b has indexes, but apply index is not useful, we should // skip this expr, just do not apply index anymore. /** - * @param name_with_types all columns with name and type in all _common_expr_ctxs_push_down see in SegmentIterator.h. - * @param inverted_indexs_iter columns which extracted from _common_expr_ctxs_push_down and has inverted index. + * @param colId_invertedIndexIter_mapping contains all column id to inverted index iterator mapping from segmentIterator * @param num_rows number of rows in one segment. * @param bitmap roaring bitmap to store the result. 0 is present filed by index. * @return status not ok means execute failed. diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index d1070b3e9d652e..8ff2bd32601b87 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -76,7 +76,7 @@ struct ArrayCountEqual { struct ParamValue { PrimitiveType type; - StringRef tmp; + Field query_value; }; template @@ -105,8 +105,13 @@ class FunctionArrayIndex : public IFunction { DCHECK(context->get_arg_type(0)->is_array_type()); // now we only support same std::shared_ptr state = std::make_shared(); - // Field field; - state->tmp = context->get_constant_col(1)->column_ptr->get_data_at(0); + Field field; + context->get_constant_col(1)->column_ptr->get(0, field); + if (field == nullptr) { + return Status::InternalError("field is nullptr"); + } else { + state->query_value = field; + } state->type = context->get_arg_type(1)->type; context->set_function_state(scope, state); return Status::OK(); @@ -124,7 +129,7 @@ class FunctionArrayIndex : public IFunction { context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, ¶m_value->tmp, + data_type_with_name.first, ¶m_value->query_value, param_value->type, segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, num_rows, roaring)); // mask out null_bitmap, since NULL cmp VALUE will produce NULL From f6612b6964f3b42fbd39895f21a5bd4303f6652b Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 27 Mar 2024 18:04:01 +0800 Subject: [PATCH 03/16] format be --- be/src/vec/functions/function.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 26630a2af4efbf..8370449a28eca4 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -63,10 +63,8 @@ void has_variadic_argument_types(...); template concept HasGetVariadicArgumentTypesImpl = requires(T t) { - { - t.get_variadic_argument_types_impl() - } -> std::same_as; - }; + { t.get_variadic_argument_types_impl() } -> std::same_as; +}; bool have_null_column(const Block& block, const ColumnNumbers& args); bool have_null_column(const ColumnsWithTypeAndName& args); From 038af1b00e3965309b02bf7699875301f3ec90a5 Mon Sep 17 00:00:00 2001 From: amorynan Date: Sat, 6 Apr 2024 09:31:57 +0800 Subject: [PATCH 04/16] update inverted index query param value --- be/src/olap/comparison_predicate.h | 8 +- be/src/olap/in_list_predicate.h | 11 +- .../segment_v2/inverted_index_reader.cpp | 89 +++++++-- .../rowset/segment_v2/inverted_index_reader.h | 174 ++++++++++-------- .../functions/array/function_array_index.h | 9 +- 5 files changed, 188 insertions(+), 103 deletions(-) diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 19e92a41bca5f7..28529dcceba373 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -101,9 +101,11 @@ class ComparisonPredicateBase : public ColumnPredicate { std::shared_ptr roaring = std::make_shared(); - auto&& value = PrimitiveTypeConvertor::to_storage_field_type(_value); - RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type, - num_rows, roaring)); + std::unique_ptr query_param = nullptr; + RETURN_IF_ERROR( + InvertedIndexQueryParamFactory::create_query_value(&_value, query_param)); + RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, query_param->get_value(), + query_type, num_rows, roaring)); // mask out null_bitmap, since NULL cmp VALUE will produce NULL // and be treated as false in WHERE diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index ec0e770ebd7b62..f710d3becce912 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -191,12 +191,15 @@ class InListPredicateBase : public ColumnPredicate { HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { const void* ptr = iter->get_value(); - auto&& value = PrimitiveTypeConvertor::to_storage_field_type( - *reinterpret_cast(ptr)); + // auto&& value = PrimitiveTypeConvertor::to_storage_field_type( + // *reinterpret_cast(ptr)); + std::unique_ptr query_param = nullptr; + RETURN_IF_ERROR( + InvertedIndexQueryParamFactory::create_query_value(ptr, query_param)); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; std::shared_ptr index = std::make_shared(); - RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type, - num_rows, index)); + RETURN_IF_ERROR(iterator->read_from_inverted_index( + column_name, query_param->get_value(), query_type, num_rows, index)); indices |= *index; iter->next(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index b09f4dcb467cf1..f70d31de3893cc 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -70,6 +70,66 @@ namespace doris::segment_v2 { +template +Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param) { + using CPP_TYPE = typename PrimitiveTypeTraits::CppType; + std::unique_ptr> param = + InvertedIndexQueryParam::create_unique(); + auto&& storage_val = PrimitiveTypeConvertor::to_storage_field_type( + *reinterpret_cast(value)); + param->set_value(&storage_val); + result_param = std::move(param); + return Status::OK(); +}; + +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); +template Status InvertedIndexQueryParamFactory::create_query_value( + const void* value, std::unique_ptr& result_param); + std::unique_ptr InvertedIndexReader::create_analyzer( InvertedIndexCtx* inverted_index_ctx) { std::unique_ptr analyzer; @@ -252,12 +312,11 @@ Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - auto&& storage_value = - PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); - std::string search_str = reinterpret_cast(storage_value)->to_string(); + + std::string search_str = reinterpret_cast(query_value)->to_string(); LOG(INFO) << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; @@ -383,13 +442,11 @@ Status StringTypeInvertedIndexReader::new_iterator( Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); - auto&& storage_value = - PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); - const auto* search_query = reinterpret_cast(storage_value); + + const auto* search_query = reinterpret_cast(query_value); auto act_len = strnlen(search_query->data, search_query->size); std::string search_str(search_query->data, act_len); // std::string search_str = reinterpret_cast(query_value)->to_string(); @@ -677,7 +734,7 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); @@ -691,10 +748,7 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_ return st; } std::string query_str; - - auto&& storage_value = - PrimitiveTypeConvertorHelper::convert_to_primitive_type(primitiveType, query_value); - _value_key_coder->full_encode_ascending(storage_value, &query_str); + _value_key_coder->full_encode_ascending(query_value, &query_str); auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -1131,9 +1185,8 @@ lucene::util::bkd::relation InvertedIndexVisitor::compare(std::vector& bit_map, bool skip_try) { + const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, + uint32_t segment_num_rows, std::shared_ptr& bit_map, bool skip_try) { if (UNLIKELY(_reader == nullptr)) { throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false); } @@ -1154,8 +1207,8 @@ Status InvertedIndexIterator::read_from_inverted_index( } } - RETURN_IF_ERROR(_reader->query(_stats, _runtime_state, column_name, query_value, primitiveType, - query_type, bit_map)); + RETURN_IF_ERROR( + _reader->query(_stats, _runtime_state, column_name, query_value, query_type, bit_map)); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 76579d1820193c..1e5541ef75eddb 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -33,6 +33,7 @@ #include "olap/rowset/segment_v2/inverted_index_desc.h" #include "olap/rowset/segment_v2/inverted_index_query_type.h" #include "olap/tablet_schema.h" +#include "runtime/primitive_type.h" #include "util/once.h" #define FINALIZE_INPUT(x) \ @@ -72,75 +73,6 @@ class InvertedIndexIterator; class InvertedIndexQueryCacheHandle; class InvertedIndexFileReader; -struct PrimitiveTypeConvertorHelper { - static const void* convert_to_primitive_type(const PrimitiveType& primitiveType, - const void* value) { - switch (primitiveType) { - case PrimitiveType::TYPE_BOOLEAN: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_TINYINT: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_SMALLINT: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_INT: - return reinterpret_cast::StorageFieldType*>(value); - case PrimitiveType::TYPE_BIGINT: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_LARGEINT: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_FLOAT: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DOUBLE: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DECIMALV2: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DECIMAL32: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DECIMAL64: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DECIMAL128I: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DECIMAL256: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_DATE: - return reinterpret_cast::StorageFieldType*>(value); - case PrimitiveType::TYPE_DATETIME: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_CHAR: - return reinterpret_cast::StorageFieldType*>(value); - case PrimitiveType::TYPE_VARCHAR: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_HLL: - return reinterpret_cast::StorageFieldType*>(value); - case PrimitiveType::TYPE_STRING: - return reinterpret_cast::StorageFieldType*>( - value); - case PrimitiveType::TYPE_IPV4: - return reinterpret_cast::StorageFieldType*>(value); - case PrimitiveType::TYPE_IPV6: - return reinterpret_cast::StorageFieldType*>(value); - default: - LOG(FATAL) << "Unsupported primitive type for inverted index reader : " - << primitiveType; - return nullptr; - } - }; -}; - class InvertedIndexReader : public std::enable_shared_from_this { public: explicit InvertedIndexReader( @@ -155,7 +87,7 @@ class InvertedIndexReader : public std::enable_shared_from_this* iterator) = 0; virtual Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) = 0; virtual Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -229,7 +161,7 @@ class FullTextIndexReader : public InvertedIndexReader { std::unique_ptr* iterator) override; Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -262,7 +194,7 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { std::unique_ptr* iterator) override; Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -322,7 +254,7 @@ class BkdIndexReader : public InvertedIndexReader { Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; Status try_query(OlapReaderStatistics* stats, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -345,6 +277,99 @@ class BkdIndexReader : public InvertedIndexReader { const KeyCoder* _value_key_coder {}; }; +/** + * @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object. + * we need a template function to make predict class like in_list_predict template class to use. + * also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index + * Now we just mapping field value in query engine to storage field value + */ +class InvertedIndexQueryParamFactory { + ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory); + +public: + virtual ~InvertedIndexQueryParamFactory() = default; + + template + static Status create_query_value(const void* value, + std::unique_ptr& result_param); + + static Status create_query_value( + const PrimitiveType& primitiveType, const void* value, + std::unique_ptr& result_param) { + switch (primitiveType) { + case PrimitiveType::TYPE_BOOLEAN: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_TINYINT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_SMALLINT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_INT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_BIGINT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_LARGEINT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_FLOAT: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DOUBLE: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DECIMALV2: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DECIMAL32: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DECIMAL64: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DECIMAL128I: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DECIMAL256: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DATE: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_DATETIME: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_CHAR: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_VARCHAR: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_HLL: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_STRING: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_IPV4: + return create_query_value(value, result_param); + case PrimitiveType::TYPE_IPV6: + return create_query_value(value, result_param); + default: + LOG(FATAL) << "Unsupported primitive type for inverted index reader : " + << primitiveType; + return Status::NotSupported("Unsupported primitive type for inverted index reader"); + } + }; + + virtual const void* get_value() const { + LOG_FATAL( + "Execution reached an undefined behavior code path in " + "InvertedIndexQueryParamFactory"); + __builtin_unreachable(); + }; +}; + +template +class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory { + ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam); + using storage_val = typename PrimitiveTypeTraits::StorageFieldType; + +public: + void set_value(const storage_val* value) { + _value = *reinterpret_cast(value); + } + + const void* get_value() const override { return &_value; } + +private: + storage_val _value; +}; + class InvertedIndexIterator { ENABLE_FACTORY_CREATOR(InvertedIndexIterator); @@ -354,8 +379,7 @@ class InvertedIndexIterator { : _stats(stats), _runtime_state(runtime_state), _reader(std::move(reader)) {} Status read_from_inverted_index(const std::string& column_name, const void* query_value, - PrimitiveType primitiveType, InvertedIndexQueryType query_type, - uint32_t segment_num_rows, + InvertedIndexQueryType query_type, uint32_t segment_num_rows, std::shared_ptr& bit_map, bool skip_try = false); Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 8ff2bd32601b87..aa9c3550e3523e 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -107,8 +107,8 @@ class FunctionArrayIndex : public IFunction { std::shared_ptr state = std::make_shared(); Field field; context->get_constant_col(1)->column_ptr->get(0, field); - if (field == nullptr) { - return Status::InternalError("field is nullptr"); + if (field.is_null()) { + return Status::InternalError("field is null"); } else { state->query_value = field; } @@ -128,8 +128,11 @@ class FunctionArrayIndex : public IFunction { auto* param_value = reinterpret_cast( context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + std::unique_ptr query_param = nullptr; + RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( + param_value->type, ¶m_value->query_value, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( - data_type_with_name.first, ¶m_value->query_value, param_value->type, + data_type_with_name.first, query_param->get_value(), segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, num_rows, roaring)); // mask out null_bitmap, since NULL cmp VALUE will produce NULL From 16084f0ce01cedd0de0ffe45243b2f5cfc531027 Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 7 Apr 2024 18:17:46 +0800 Subject: [PATCH 05/16] fix some bugs and some log --- .../rowset/segment_v2/segment_iterator.cpp | 7 ++-- be/src/vec/exprs/vcompound_pred.h | 41 ++++++++++++++----- be/src/vec/exprs/vectorized_fn_call.cpp | 6 +-- be/src/vec/exprs/vectorized_fn_call.h | 2 +- be/src/vec/exprs/vexpr.h | 2 +- be/src/vec/exprs/vexpr_context.cpp | 4 +- be/src/vec/exprs/vexpr_context.h | 4 +- 7 files changed, 43 insertions(+), 23 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 92e234832d2fcb..5d9ba1cc377540 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1236,7 +1236,7 @@ Status SegmentIterator::_apply_inverted_index() { _inverted_index_iterators[col_id].get()); } } - for (auto exprCtx : _common_expr_ctxs_push_down) { + for (auto expr_ctx : _common_expr_ctxs_push_down) { // _inverted_index_iterators has all column ids which has inverted index // _common_expr_columns has all column ids from _common_expr_ctxs_push_down // if current bitmap is already empty just return @@ -1244,11 +1244,12 @@ Status SegmentIterator::_apply_inverted_index() { break; } roaring::Roaring bitmap = _row_bitmap; - const Status st = exprCtx->eval_inverted_indexs(iter_map, num_rows(), &bitmap); + const Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), &bitmap); if (!st.ok()) { - LOG(WARNING) << "failed to evaluate index in expr" << exprCtx->root()->debug_string() + LOG(WARNING) << "failed to evaluate index in expr" << expr_ctx->root()->debug_string() << ", error msg: " << st; } else { + // every single result of expr_ctx must be `and` collection relationship _row_bitmap &= bitmap; } } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index d03b05f4c08c9a..030ef95aedf24c 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -71,34 +71,53 @@ class VCompoundPred : public VectorizedFnCall { VExprContext* context, const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const override { if (_op == TExprOpcode::COMPOUND_OR) { for (auto child : _children) { - Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, - num_rows, bitmap); + std::shared_ptr child_roaring = + std::make_shared(); + Status st = child->eval_inverted_index(context, colId_to_inverted_index_iter, + num_rows, child_roaring.get()); if (!st.ok()) { - return st; + continue; } - if (!bitmap->contains( - 0)) { // the left expr no need to be extracted by inverted index + *bitmap |= *child_roaring; + if (!child_roaring->isEmpty()) { + // means inverted index filter do not reduce any rows + // the left expr no need to be extracted by inverted index, + // and cur roaring is all rows which means this inverted index is not useful, + // do not need to calculate with res bitmap return Status::OK(); } } } else if (_op == TExprOpcode::COMPOUND_AND) { for (auto child : _children) { - Status st = child->eval_inverted_index(context, colId_invertedIndexIter_mapping, - num_rows, bitmap); + std::shared_ptr child_roaring = + std::make_shared(); + Status st = child->eval_inverted_index(context, colId_to_inverted_index_iter, + num_rows, child_roaring.get()); if (!st.ok()) { - return st; + continue; } - if (bitmap->isEmpty()) { // the left expr no need to be extracted by inverted index + *bitmap &= *child_roaring; + if (child_roaring->isEmpty()) { + // the left expr no need to be extracted by inverted index, just return 0 rows + // res bitmap will be zero return Status::OK(); } } + } else if (_op == TExprOpcode::COMPOUND_NOT) { + std::shared_ptr child_roaring = std::make_shared(); + Status st = _children[0]->eval_inverted_index(context, colId_to_inverted_index_iter, + num_rows, child_roaring.get()); + if (!st.ok()) { + return st; + } + *bitmap -= *child_roaring; } else { return Status::InternalError( - "Compound operator must be AND or OR can execute with inverted index."); + "Compound operator must be AND or OR or Not can execute with inverted index."); } return Status::OK(); } diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index c96bfb707902d7..691faf41cb9a21 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -140,13 +140,13 @@ Status VectorizedFnCall::eval_inverted_index( VExprContext* context, const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const { DCHECK_GE(get_num_children(), 1); if (get_child(0)->is_slot_ref()) { auto* column_slot_ref = assert_cast(get_child(0).get()); - if (auto iter = colId_invertedIndexIter_mapping.find(column_slot_ref->column_id()); - iter != colId_invertedIndexIter_mapping.end()) { + if (auto iter = colId_to_inverted_index_iter.find(column_slot_ref->column_id()); + iter != colId_to_inverted_index_iter.end()) { const auto& pair = iter->second; return _function->eval_inverted_index(context->fn_context(_fn_context_index), pair.first, pair.second, num_rows, bitmap); diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index 27e84c0343a40a..f7887b5256a3bd 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -55,7 +55,7 @@ class VectorizedFnCall : public VExpr { VExprContext* context, const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const override; Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; Status open(RuntimeState* state, VExprContext* context, diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index f26acdf5bf5835..86615823af46b4 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -120,7 +120,7 @@ class VExpr { VExprContext* context, const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const { return Status::NotSupported("Not supported execute_with_inverted_index"); } diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index e51a8b54e87790..323df2e294a04e 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -122,9 +122,9 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr Status VExprContext::eval_inverted_indexs( const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) { - return _root->eval_inverted_index(this, colId_invertedIndexIter_mapping, num_rows, bitmap); + return _root->eval_inverted_index(this, colId_to_inverted_index_iter, num_rows, bitmap); } Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) { diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index aa48edad02967b..be0978a1f97542 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -74,7 +74,7 @@ class VExprContext { // but some situation although column b has indexes, but apply index is not useful, we should // skip this expr, just do not apply index anymore. /** - * @param colId_invertedIndexIter_mapping contains all column id to inverted index iterator mapping from segmentIterator + * @param colId_to_inverted_index_iter contains all column id to inverted index iterator mapping from segmentIterator * @param num_rows number of rows in one segment. * @param bitmap roaring bitmap to store the result. 0 is present filed by index. * @return status not ok means execute failed. @@ -82,7 +82,7 @@ class VExprContext { [[nodiscard]] Status eval_inverted_indexs( const std::unordered_map>& - colId_invertedIndexIter_mapping, + colId_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap); [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, From 8774f475dbb2228cb2db6b84cab001ab20a16cf5 Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 17 Apr 2024 15:55:51 +0800 Subject: [PATCH 06/16] fix some comments: --- .../rowset/segment_v2/segment_iterator.cpp | 13 ++--- be/src/vec/exprs/vcompound_pred.h | 34 +++++------ be/src/vec/exprs/vectorized_fn_call.cpp | 6 +- be/src/vec/exprs/vectorized_fn_call.h | 2 +- be/src/vec/exprs/vexpr.h | 2 +- be/src/vec/exprs/vexpr_context.cpp | 4 +- be/src/vec/exprs/vexpr_context.h | 4 +- .../functions/array/function_array_index.h | 10 +--- be/test/util/roaring_bitmap_test.cpp | 58 +++++++++++++++++++ 9 files changed, 88 insertions(+), 45 deletions(-) create mode 100644 be/test/util/roaring_bitmap_test.cpp diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 85de6635842542..a6bd0fd3692976 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1244,14 +1244,11 @@ Status SegmentIterator::_apply_inverted_index() { if (_row_bitmap.isEmpty()) { break; } - roaring::Roaring bitmap = _row_bitmap; - const Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), &bitmap); - if (!st.ok()) { - LOG(WARNING) << "failed to evaluate index in expr" << expr_ctx->root()->debug_string() - << ", error msg: " << st; - } else { - // every single result of expr_ctx must be `and` collection relationship - _row_bitmap &= bitmap; + // every single result of expr_ctx must be `and` collection relationship + if (Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), &_row_bitmap); + !st.ok() && st.code() != ErrorCode::NOT_IMPLEMENTED_ERROR) { + LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" + << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); } } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 030ef95aedf24c..c988ce98c7fed2 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -53,16 +53,6 @@ class VCompoundPred : public VectorizedFnCall { const std::string& expr_name() const override { return _expr_name; } - bool is_all_ones(const roaring::Roaring& r) { - return r.contains(0); - for (roaring::RoaringSetBitForwardIterator i = r.begin(); i != r.end(); ++i) { - if (*i == 0) { - return false; - } - } - return true; - } - // 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted // 2. when meet 'and' conjunct, function with column b can not apply inverted index // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted @@ -71,19 +61,20 @@ class VCompoundPred : public VectorizedFnCall { VExprContext* context, const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const override { + std::shared_ptr res = std::make_shared(); if (_op == TExprOpcode::COMPOUND_OR) { for (auto child : _children) { std::shared_ptr child_roaring = std::make_shared(); - Status st = child->eval_inverted_index(context, colId_to_inverted_index_iter, + Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter, num_rows, child_roaring.get()); if (!st.ok()) { continue; } - *bitmap |= *child_roaring; - if (!child_roaring->isEmpty()) { + *res |= *child_roaring; + if (res->cardinality() == num_rows) { // means inverted index filter do not reduce any rows // the left expr no need to be extracted by inverted index, // and cur roaring is all rows which means this inverted index is not useful, @@ -91,30 +82,31 @@ class VCompoundPred : public VectorizedFnCall { return Status::OK(); } } + *bitmap &= *res; } else if (_op == TExprOpcode::COMPOUND_AND) { for (auto child : _children) { std::shared_ptr child_roaring = std::make_shared(); - Status st = child->eval_inverted_index(context, colId_to_inverted_index_iter, + Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter, num_rows, child_roaring.get()); if (!st.ok()) { continue; } - *bitmap &= *child_roaring; - if (child_roaring->isEmpty()) { + *res &= *child_roaring; + if (res->isEmpty()) { // the left expr no need to be extracted by inverted index, just return 0 rows // res bitmap will be zero return Status::OK(); } } + *bitmap &= *res; } else if (_op == TExprOpcode::COMPOUND_NOT) { - std::shared_ptr child_roaring = std::make_shared(); - Status st = _children[0]->eval_inverted_index(context, colId_to_inverted_index_iter, - num_rows, child_roaring.get()); + Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, + num_rows, res.get()); if (!st.ok()) { return st; } - *bitmap -= *child_roaring; + *bitmap -= *res; } else { return Status::InternalError( "Compound operator must be AND or OR or Not can execute with inverted index."); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index c9ca5d759b557d..37c93993fd3eb5 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -140,13 +140,13 @@ Status VectorizedFnCall::eval_inverted_index( VExprContext* context, const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const { DCHECK_GE(get_num_children(), 1); if (get_child(0)->is_slot_ref()) { auto* column_slot_ref = assert_cast(get_child(0).get()); - if (auto iter = colId_to_inverted_index_iter.find(column_slot_ref->column_id()); - iter != colId_to_inverted_index_iter.end()) { + if (auto iter = colid_to_inverted_index_iter.find(column_slot_ref->column_id()); + iter != colid_to_inverted_index_iter.end()) { const auto& pair = iter->second; return _function->eval_inverted_index(context->fn_context(_fn_context_index), pair.first, pair.second, num_rows, bitmap); diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h index f7887b5256a3bd..a3ce85bb588f9a 100644 --- a/be/src/vec/exprs/vectorized_fn_call.h +++ b/be/src/vec/exprs/vectorized_fn_call.h @@ -55,7 +55,7 @@ class VectorizedFnCall : public VExpr { VExprContext* context, const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const override; Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override; Status open(RuntimeState* state, VExprContext* context, diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 26f5a2496a0baf..733d618b075e59 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -120,7 +120,7 @@ class VExpr { VExprContext* context, const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) const { return Status::NotSupported("Not supported execute_with_inverted_index"); } diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 323df2e294a04e..d0822cf2c3aa98 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -122,9 +122,9 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr Status VExprContext::eval_inverted_indexs( const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap) { - return _root->eval_inverted_index(this, colId_to_inverted_index_iter, num_rows, bitmap); + return _root->eval_inverted_index(this, colid_to_inverted_index_iter, num_rows, bitmap); } Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) { diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index be0978a1f97542..7a879f8d437dc7 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -74,7 +74,7 @@ class VExprContext { // but some situation although column b has indexes, but apply index is not useful, we should // skip this expr, just do not apply index anymore. /** - * @param colId_to_inverted_index_iter contains all column id to inverted index iterator mapping from segmentIterator + * @param colid_to_inverted_index_iter contains all column id to inverted index iterator mapping from segmentIterator * @param num_rows number of rows in one segment. * @param bitmap roaring bitmap to store the result. 0 is present filed by index. * @return status not ok means execute failed. @@ -82,7 +82,7 @@ class VExprContext { [[nodiscard]] Status eval_inverted_indexs( const std::unordered_map>& - colId_to_inverted_index_iter, + colid_to_inverted_index_iter, uint32_t num_rows, roaring::Roaring* bitmap); [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index aa9c3550e3523e..3244a8caeb0b4e 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -76,7 +76,7 @@ struct ArrayCountEqual { struct ParamValue { PrimitiveType type; - Field query_value; + Field value; }; template @@ -107,11 +107,7 @@ class FunctionArrayIndex : public IFunction { std::shared_ptr state = std::make_shared(); Field field; context->get_constant_col(1)->column_ptr->get(0, field); - if (field.is_null()) { - return Status::InternalError("field is null"); - } else { - state->query_value = field; - } + state->value = field; state->type = context->get_arg_type(1)->type; context->set_function_state(scope, state); return Status::OK(); @@ -130,7 +126,7 @@ class FunctionArrayIndex : public IFunction { std::unique_ptr query_param = nullptr; RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - param_value->type, ¶m_value->query_value, query_param)); + param_value->type, ¶m_value->value, query_param)); RETURN_IF_ERROR(iter->read_from_inverted_index( data_type_with_name.first, query_param->get_value(), segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, num_rows, roaring)); diff --git a/be/test/util/roaring_bitmap_test.cpp b/be/test/util/roaring_bitmap_test.cpp new file mode 100644 index 00000000000000..157e02ca2f5d45 --- /dev/null +++ b/be/test/util/roaring_bitmap_test.cpp @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "gtest/gtest_pred_impl.h" +#include "roaring/roaring.hh" + +namespace doris { + +TEST(RaringBitmapTest, IsAllOne) { + std::shared_ptr bitmap = std::make_shared(); + bitmap->addRange(0, 1024); + EXPECT_TRUE(bitmap->contains(1)); + EXPECT_FALSE(bitmap->contains(1025)); + EXPECT_EQ(bitmap->cardinality(), 1024); + + std::shared_ptr bitmap2 = std::make_shared(); + bitmap2->addRange(26, 31); + // and + *bitmap &= *bitmap2; + EXPECT_TRUE(bitmap->contains(26)); + EXPECT_FALSE(bitmap->contains(25)); + EXPECT_EQ(bitmap->cardinality(), 5); + + // or + std::shared_ptr bitmap3 = std::make_shared(); + bitmap3->addRange(0, 1024); + *bitmap |= *bitmap3; + EXPECT_TRUE(bitmap->contains(1)); + EXPECT_TRUE(bitmap->contains(31)); + EXPECT_FALSE(bitmap->contains(1025)); + + // not + std::shared_ptr bitmap4 = std::make_shared(); + bitmap4->addRange(32, 2048); + *bitmap -= *bitmap4; + EXPECT_EQ(0, bitmap->minimum()); + EXPECT_EQ(bitmap->maximum(), 31); + EXPECT_EQ(bitmap->cardinality(), 32); +} + +} // namespace doris \ No newline at end of file From 8931ec3be205a518ccd65d86d6056ec1a67cec1b Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 17 Apr 2024 22:29:09 +0800 Subject: [PATCH 07/16] fix some write bugs and add cases --- .../rowset/segment_v2/inverted_index_reader.h | 6 -- .../segment_v2/inverted_index_writer.cpp | 2 + .../rowset/segment_v2/segment_iterator.cpp | 7 +- be/src/vec/exprs/vcompound_pred.h | 11 ++- be/src/vec/exprs/vectorized_fn_call.cpp | 3 + .../functions/array/function_array_index.h | 14 ++-- ...est_array_contains_with_inverted_index.out | 40 ++++++++++ ..._array_contains_with_inverted_index.groovy | 78 +++++++++++++++++++ 8 files changed, 141 insertions(+), 20 deletions(-) create mode 100644 regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out create mode 100644 regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 96888147577cd9..b41c3a9333ad56 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -332,14 +332,8 @@ class InvertedIndexQueryParamFactory { return create_query_value(value, result_param); case PrimitiveType::TYPE_VARCHAR: return create_query_value(value, result_param); - case PrimitiveType::TYPE_HLL: - return create_query_value(value, result_param); case PrimitiveType::TYPE_STRING: return create_query_value(value, result_param); - case PrimitiveType::TYPE_IPV4: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_IPV6: - return create_query_value(value, result_param); default: LOG(FATAL) << "Unsupported primitive type for inverted index reader : " << primitiveType; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index b77d5d6b324a32..2247f254810a7e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -405,6 +405,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { RETURN_IF_ERROR(add_document()); _doc->clear(); _CLDELETE(ts); + } else { + RETURN_IF_ERROR(add_null_document()); } _rid++; } diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index a6bd0fd3692976..5709316c03db80 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1244,11 +1244,14 @@ Status SegmentIterator::_apply_inverted_index() { if (_row_bitmap.isEmpty()) { break; } - // every single result of expr_ctx must be `and` collection relationship - if (Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), &_row_bitmap); + std::shared_ptr result_bitmap = std::make_shared(); + if (Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), result_bitmap.get()); !st.ok() && st.code() != ErrorCode::NOT_IMPLEMENTED_ERROR) { LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); + } else { + // every single result of expr_ctx must be `and` collection relationship + _row_bitmap &= *result_bitmap; } } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index c988ce98c7fed2..ad28c2eb2313b3 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -71,7 +71,8 @@ class VCompoundPred : public VectorizedFnCall { Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter, num_rows, child_roaring.get()); if (!st.ok()) { - continue; + bitmap->addRange(0, num_rows); + return st; } *res |= *child_roaring; if (res->cardinality() == num_rows) { @@ -82,7 +83,7 @@ class VCompoundPred : public VectorizedFnCall { return Status::OK(); } } - *bitmap &= *res; + *bitmap = *res; } else if (_op == TExprOpcode::COMPOUND_AND) { for (auto child : _children) { std::shared_ptr child_roaring = @@ -99,14 +100,16 @@ class VCompoundPred : public VectorizedFnCall { return Status::OK(); } } - *bitmap &= *res; + *bitmap = *res; } else if (_op == TExprOpcode::COMPOUND_NOT) { Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, num_rows, res.get()); if (!st.ok()) { return st; } - *bitmap -= *res; + std::shared_ptr all_rows = std::make_shared(); + all_rows->addRange(0, num_rows); + *bitmap = *all_rows - *res; } else { return Status::InternalError( "Compound operator must be AND or OR or Not can execute with inverted index."); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 37c93993fd3eb5..40ec80a7046485 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -150,6 +150,9 @@ Status VectorizedFnCall::eval_inverted_index( const auto& pair = iter->second; return _function->eval_inverted_index(context->fn_context(_fn_context_index), pair.first, pair.second, num_rows, bitmap); + } else { + return Status::InternalError("column id ", column_slot_ref->column_id(), + " not found in colid_to_inverted_index_iter"); } } else { return Status::InternalError("we can only eval inverted index for slot ref expr, but got ", diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index 3244a8caeb0b4e..b2438dd0db8c77 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -134,16 +134,14 @@ class FunctionArrayIndex : public IFunction { // mask out null_bitmap, since NULL cmp VALUE will produce NULL // and be treated as false in WHERE // keep it after query, since query will try to read null_bitmap and put it to cache - if (iter->has_null()) { - segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; - RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); - std::shared_ptr null_bitmap = null_bitmap_cache_handle.get_bitmap(); - if (null_bitmap) { - *bitmap -= *null_bitmap; - } + segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; + RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); + std::shared_ptr null_bitmap = null_bitmap_cache_handle.get_bitmap(); + if (null_bitmap) { + *bitmap -= *null_bitmap; } - *bitmap &= *roaring; + *bitmap = *roaring; return Status::OK(); } diff --git a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out new file mode 100644 index 00000000000000..6e7d0ee0ad1d03 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out @@ -0,0 +1,40 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +16 + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] + +-- !sql -- + +-- !sql -- +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] + +-- !sql -- +2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] +2017-01-01 48a33ec3453a28bce84b8f96fe161956 ["m"] +2017-01-01 6afef581285b6608bf80d5a4e46cf839 ["a", "b", "c"] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a3 \N +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a4 \N +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a5 [] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a6 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a7 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] +2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] +2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] + +-- !sql -- +2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] +2017-01-01 48a33ec3453a28bce84b8f96fe161956 ["m"] +2017-01-01 6afef581285b6608bf80d5a4e46cf839 ["a", "b", "c"] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a5 [] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a6 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a7 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] +2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] +2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 0974e7a82e30d1af83205e474fadd0a2 ["w"] +2019-01-01 26823b3995ee38bd145ddd910b2f6300 ["x"] +2019-01-01 a9fb5c985c90bf05f3bee5ca3ae95260 ["u", "v"] +2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] + diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy new file mode 100644 index 00000000000000..c0fa9779c4f2fb --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_contains_with_inverted_index"){ + // prepare test table + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "tai" + + // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true + sql """ set enable_common_expr_pushdown = true; """ + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE `${indexTblName}` ( + `apply_date` date NULL COMMENT '', + `id` varchar(60) NOT NULL COMMENT '', + `inventors` array NULL COMMENT '', + INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`apply_date`, `id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\", \"r\", \"s\", \"t\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """ + sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """ + + qt_sql """ select count() from ${indexTblName}""" + order_qt_sql """ select * from tai where array_contains(inventors, 's'); """ + + order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select * from tai where array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(inventors, 's') order by id; """ + +} \ No newline at end of file From e1192fceb433df130fad51b75f067f81982d7817 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 18 Apr 2024 15:50:57 +0800 Subject: [PATCH 08/16] fixed and add more cases --- .../rowset/segment_v2/segment_iterator.cpp | 2 +- be/src/vec/exprs/vcompound_pred.h | 10 +++-- be/src/vec/exprs/vexpr_context.cpp | 2 +- be/src/vec/exprs/vexpr_context.h | 2 +- ...est_array_contains_with_inverted_index.out | 42 +++++++++++++++++++ ..._array_contains_with_inverted_index.groovy | 5 ++- 6 files changed, 56 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 5709316c03db80..01012ffde0d27a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1245,7 +1245,7 @@ Status SegmentIterator::_apply_inverted_index() { break; } std::shared_ptr result_bitmap = std::make_shared(); - if (Status st = expr_ctx->eval_inverted_indexs(iter_map, num_rows(), result_bitmap.get()); + if (Status st = expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get()); !st.ok() && st.code() != ErrorCode::NOT_IMPLEMENTED_ERROR) { LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index ad28c2eb2313b3..bdb6f07fda581d 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -85,15 +85,19 @@ class VCompoundPred : public VectorizedFnCall { } *bitmap = *res; } else if (_op == TExprOpcode::COMPOUND_AND) { - for (auto child : _children) { + for (int i = 0; i < _children.size(); ++i) { std::shared_ptr child_roaring = std::make_shared(); - Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter, + Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, num_rows, child_roaring.get()); if (!st.ok()) { continue; } - *res &= *child_roaring; + if (i == 0) { + *res = *child_roaring; + } else { + *res &= *child_roaring; + } if (res->isEmpty()) { // the left expr no need to be extracted by inverted index, just return 0 rows // res bitmap will be zero diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index d0822cf2c3aa98..c2a45180aac0af 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -119,7 +119,7 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal()); return _fn_contexts.size() - 1; } -Status VExprContext::eval_inverted_indexs( +Status VExprContext::eval_inverted_index( const std::unordered_map>& colid_to_inverted_index_iter, diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 7a879f8d437dc7..fc4862ef6c1ea6 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -79,7 +79,7 @@ class VExprContext { * @param bitmap roaring bitmap to store the result. 0 is present filed by index. * @return status not ok means execute failed. */ - [[nodiscard]] Status eval_inverted_indexs( + [[nodiscard]] Status eval_inverted_index( const std::unordered_map>& colid_to_inverted_index_iter, diff --git a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out index 6e7d0ee0ad1d03..76a72d8c595b01 100644 --- a/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out +++ b/regression-test/data/inverted_index_p0/test_array_contains_with_inverted_index.out @@ -22,11 +22,46 @@ 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] 2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] 2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] + +-- !sql -- +2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] +2017-01-01 48a33ec3453a28bce84b8f96fe161956 ["m"] +2017-01-01 6afef581285b6608bf80d5a4e46cf839 ["a", "b", "c"] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a5 [] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a6 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a7 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] +2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] +2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] +2019-01-01 0974e7a82e30d1af83205e474fadd0a2 ["w"] +2019-01-01 26823b3995ee38bd145ddd910b2f6300 ["x"] +2019-01-01 a9fb5c985c90bf05f3bee5ca3ae95260 ["u", "v"] +2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] + +-- !sql -- +2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] +2017-01-01 48a33ec3453a28bce84b8f96fe161956 ["m"] +2017-01-01 6afef581285b6608bf80d5a4e46cf839 ["a", "b", "c"] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a5 [] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a6 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a7 [null, null, null] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a8 [] +2017-01-01 9fcb57ae675f0af4d613d9e6c0e8a2a2 ["o"] +2017-01-01 d93d942d985a8fb7547c72dada8d332d ["d", "e", "f", "g", "h", "i", "j", "k", "l"] + +-- !sql -- +2019-01-01 0974e7a82e30d1af83205e474fadd0a2 ["w"] +2019-01-01 26823b3995ee38bd145ddd910b2f6300 ["x"] +2019-01-01 a9fb5c985c90bf05f3bee5ca3ae95260 ["u", "v"] +2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] -- !sql -- 2017-01-01 021603e7dcfe65d44af0efd0e5aee154 ["n"] 2017-01-01 48a33ec3453a28bce84b8f96fe161956 ["m"] 2017-01-01 6afef581285b6608bf80d5a4e46cf839 ["a", "b", "c"] +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a3 \N +2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a4 \N 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a5 [] 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a6 [null, null, null] 2017-01-01 8fcb57ae675f0af4d613d9e6c0e8a2a7 [null, null, null] @@ -38,3 +73,10 @@ 2019-01-01 a9fb5c985c90bf05f3bee5ca3ae95260 ["u", "v"] 2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] +-- !sql -- +2019-01-01 0974e7a82e30d1af83205e474fadd0a2 ["w"] +2019-01-01 26823b3995ee38bd145ddd910b2f6300 ["x"] +2019-01-01 a648a447b8f71522f11632eba4b4adde ["p", "q", "r", "s", "t"] +2019-01-01 a9fb5c985c90bf05f3bee5ca3ae95260 ["u", "v"] +2019-01-01 ee27ee1da291e46403c408e220bed6e1 ["y"] + diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy index c0fa9779c4f2fb..f2100fe4ae375c 100644 --- a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy @@ -74,5 +74,8 @@ suite("test_array_contains_with_inverted_index"){ order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """ order_qt_sql """ select * from tai where array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """ order_qt_sql """ select * from tai where !array_contains(inventors, 's') order by id; """ - + order_qt_sql """ select * from tai where !array_contains(inventors, 's') and apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """ + order_qt_sql """ select * from tai where !array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """ + order_qt_sql """ select * from tai where (array_contains(inventors, 's') and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ } \ No newline at end of file From 3a5ab22ce109541b32f28b1f194fe20c2152b3f9 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 18 Apr 2024 15:57:07 +0800 Subject: [PATCH 09/16] fix code format --- be/src/vec/exprs/vcompound_pred.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index bdb6f07fda581d..a8ae948b7c754b 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -89,7 +89,7 @@ class VCompoundPred : public VectorizedFnCall { std::shared_ptr child_roaring = std::make_shared(); Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, - num_rows, child_roaring.get()); + num_rows, child_roaring.get()); if (!st.ok()) { continue; } From d48a1de18b8bbccec9595127d750d5e7c6c03ecc Mon Sep 17 00:00:00 2001 From: amorynan Date: Fri, 19 Apr 2024 17:47:35 +0800 Subject: [PATCH 10/16] fix array index test --- be/src/vec/functions/array/function_array_index.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/vec/functions/array/function_array_index.h b/be/src/vec/functions/array/function_array_index.h index b2438dd0db8c77..5221d4f51f3ed5 100644 --- a/be/src/vec/functions/array/function_array_index.h +++ b/be/src/vec/functions/array/function_array_index.h @@ -106,10 +106,12 @@ class FunctionArrayIndex : public IFunction { // now we only support same std::shared_ptr state = std::make_shared(); Field field; - context->get_constant_col(1)->column_ptr->get(0, field); - state->value = field; - state->type = context->get_arg_type(1)->type; - context->set_function_state(scope, state); + if (context->get_constant_col(1)) { + context->get_constant_col(1)->column_ptr->get(0, field); + state->value = field; + state->type = context->get_arg_type(1)->type; + context->set_function_state(scope, state); + } return Status::OK(); } From 1c2126a1d714c6197c22bcd6bfde322e20c2020b Mon Sep 17 00:00:00 2001 From: amorynan Date: Wed, 24 Apr 2024 18:24:09 +0800 Subject: [PATCH 11/16] fix segment iterator --- .../rowset/segment_v2/segment_iterator.cpp | 24 ++++++++++++++++--- .../olap/rowset/segment_v2/segment_iterator.h | 3 +++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 01012ffde0d27a..6305db81542623 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -362,10 +362,10 @@ Status SegmentIterator::_lazy_init() { _segment->_tablet_schema->cluster_key_idxes().empty()) { RETURN_IF_ERROR(_get_row_ranges_by_keys()); } - _is_common_expr_column.resize(_schema->columns().size(), false); + // extract for index apply col id which is slot_ref if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { for (auto expr : _remaining_conjunct_roots) { - RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + RETURN_IF_ERROR(_extract_common_expr_columns_for_index(expr)); } } RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); @@ -732,6 +732,20 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr return Status::OK(); } +Status SegmentIterator::_extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr) { + auto& children = expr->children(); + for (int i = 0; i < children.size(); ++i) { + RETURN_IF_ERROR(_extract_common_expr_columns_for_index(children[i])); + } + + auto node_type = expr->node_type(); + if (node_type == TExprNodeType::SLOT_REF) { + auto slot_expr = std::dynamic_pointer_cast(expr); + _common_expr_columns_for_index.insert(_schema->column_id(slot_expr->column_id())); + } + return Status::OK(); +} + Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( const vectorized::VExprSPtr& expr) { if (expr == nullptr) { @@ -1231,7 +1245,7 @@ Status SegmentIterator::_apply_inverted_index() { std::unordered_map> iter_map; - for (auto col_id : _common_expr_columns) { + for (auto col_id : _common_expr_columns_for_index) { if (_check_apply_by_inverted_index(col_id)) { iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], _inverted_index_iterators[col_id].get()); @@ -1627,7 +1641,11 @@ Status SegmentIterator::_vec_init_lazy_materialization() { } // Step2: extract columns that can execute expr context + _is_common_expr_column.resize(_schema->columns().size(), false); if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { + for (auto expr : _remaining_conjunct_roots) { + RETURN_IF_ERROR(_extract_common_expr_columns(expr)); + } if (!_common_expr_columns.empty()) { _is_need_expr_eval = true; for (auto cid : _schema->column_ids()) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index cc39c606ca2368..c01141509b553e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -274,6 +274,8 @@ class SegmentIterator : public RowwiseIterator { bool _can_evaluated_by_vectorized(ColumnPredicate* predicate); [[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr); + // same with _extract_common_expr_columns, but only extract columns that can be used for index + [[nodiscard]] Status _extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr); [[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size, vectorized::Block* block); uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size, @@ -410,6 +412,7 @@ class SegmentIterator : public RowwiseIterator { // columns to read after predicate evaluation and remaining expr execute std::vector _non_predicate_columns; std::set _common_expr_columns; + std::set _common_expr_columns_for_index; // remember the rowids we've read for the current row block. // could be a local variable of next_batch(), kept here to reuse vector memory std::vector _block_rowids; From c499a8489373e793d813d5511a30eb7e799b75d7 Mon Sep 17 00:00:00 2001 From: amorynan Date: Thu, 25 Apr 2024 07:54:58 +0800 Subject: [PATCH 12/16] fix IFunction error --- be/src/vec/functions/function.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 8370449a28eca4..da42a628036337 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -404,12 +404,13 @@ class IFunction : public std::enable_shared_from_this, return Status::OK(); } + // here are lots of function not extends eval_inverted_index. Status eval_inverted_index(FunctionContext* context, const vectorized::NameAndTypePair& data_type_with_name, segment_v2::InvertedIndexIterator* iter, uint32_t num_rows, roaring::Roaring* bitmap) const override { - LOG(FATAL) << "eval_inverted_index is not implemented for IFunction"; - __builtin_unreachable(); + return Status::NotSupported("eval_inverted_index is not supported in function: ", + get_name()); } [[noreturn]] const DataTypes& get_argument_types() const final { From 20a8745c2a76b4375c2d31c12959e3ea5f5f6219 Mon Sep 17 00:00:00 2001 From: amorynan Date: Fri, 26 Apr 2024 17:57:02 +0800 Subject: [PATCH 13/16] add switch for inverted index --- .../rowset/segment_v2/segment_iterator.cpp | 60 ++++++++++++------- be/src/runtime/runtime_state.h | 6 ++ be/src/vec/exprs/vcompound_pred.h | 2 +- be/src/vec/exprs/vectorized_fn_call.cpp | 4 +- .../org/apache/doris/qe/SessionVariable.java | 19 ++++++ gensrc/thrift/PaloInternalService.thrift | 4 +- ..._array_contains_with_inverted_index.groovy | 1 + 7 files changed, 69 insertions(+), 27 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 6305db81542623..40d945984049b1 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -309,6 +309,8 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _remaining_conjunct_roots = opts.remaining_conjunct_roots; _common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down; _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty(); + _enable_common_expr_pushdown = + _opts.runtime_state->enable_common_expr_pushdown_for_inverted_index(); _column_predicate_info.reset(new ColumnPredicateInfo()); for (auto& expr : _remaining_conjunct_roots) { @@ -1241,31 +1243,43 @@ Status SegmentIterator::_apply_inverted_index() { } } - // support expr to evaluate inverted index - std::unordered_map> - iter_map; + // add a switch for inverted index filter + if (_opts.runtime_state && + _opts.runtime_state->enable_common_expr_pushdown_for_inverted_index()) { + // support expr to evaluate inverted index + std::unordered_map> + iter_map; - for (auto col_id : _common_expr_columns_for_index) { - if (_check_apply_by_inverted_index(col_id)) { - iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], - _inverted_index_iterators[col_id].get()); - } - } - for (auto expr_ctx : _common_expr_ctxs_push_down) { - // _inverted_index_iterators has all column ids which has inverted index - // _common_expr_columns has all column ids from _common_expr_ctxs_push_down - // if current bitmap is already empty just return - if (_row_bitmap.isEmpty()) { - break; + for (auto col_id : _common_expr_columns_for_index) { + if (_check_apply_by_inverted_index(col_id)) { + iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], + _inverted_index_iterators[col_id].get()); + } } - std::shared_ptr result_bitmap = std::make_shared(); - if (Status st = expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get()); - !st.ok() && st.code() != ErrorCode::NOT_IMPLEMENTED_ERROR) { - LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" - << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); - } else { - // every single result of expr_ctx must be `and` collection relationship - _row_bitmap &= *result_bitmap; + for (auto expr_ctx : _common_expr_ctxs_push_down) { + // _inverted_index_iterators has all column ids which has inverted index + // _common_expr_columns has all column ids from _common_expr_ctxs_push_down + // if current bitmap is already empty just return + if (_row_bitmap.isEmpty()) { + break; + } + std::shared_ptr result_bitmap = std::make_shared(); + if (Status st = + expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get()); + !st.ok()) { + if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { + continue; + } else { + // other code is not to be handled, we should just break + LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" + << expr_ctx->root()->debug_string() + << ", error msg: " << st.to_string(); + break; + } + } else { + // every single result of expr_ctx must be `and` collection relationship + _row_bitmap &= *result_bitmap; + } } } diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 644db3e32eb211..f3bc64b98a32bd 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -178,6 +178,12 @@ class RuntimeState { _query_options.enable_common_expr_pushdown; } + bool enable_common_expr_pushdown_for_inverted_index() const { + return enable_common_expr_pushdown() && + _query_options.__isset.enable_common_expr_pushdown_for_inverted_index && + _query_options.enable_common_expr_pushdown_for_inverted_index; + }; + bool enable_faster_float_convert() const { return _query_options.__isset.faster_float_convert && _query_options.faster_float_convert; } diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index a8ae948b7c754b..6ac8dd6d8f0e87 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -115,7 +115,7 @@ class VCompoundPred : public VectorizedFnCall { all_rows->addRange(0, num_rows); *bitmap = *all_rows - *res; } else { - return Status::InternalError( + return Status::NotSupported( "Compound operator must be AND or OR or Not can execute with inverted index."); } return Status::OK(); diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 40ec80a7046485..de700dd132c82b 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -155,8 +155,8 @@ Status VectorizedFnCall::eval_inverted_index( " not found in colid_to_inverted_index_iter"); } } else { - return Status::InternalError("we can only eval inverted index for slot ref expr, but got ", - get_child(0)->expr_name()); + return Status::NotSupported("we can only eval inverted index for slot ref expr, but got ", + get_child(0)->expr_name()); } return Status::OK(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index a5706622221228..5862c04c709c19 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -383,6 +383,9 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_INVERTED_INDEX_QUERY = "enable_inverted_index_query"; + public static final String ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX + = "enable_common_expr_pushdown_for_inverted_index"; + public static final String ENABLE_PUSHDOWN_COUNT_ON_INDEX = "enable_count_on_index_pushdown"; public static final String GROUP_BY_AND_HAVING_USE_ALIAS_FIRST = "group_by_and_having_use_alias_first"; @@ -1351,6 +1354,11 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { "是否启用inverted index query。", "Set whether to use inverted index query."}) public boolean enableInvertedIndexQuery = true; + // Whether enable query expr with inverted index. + @VariableMgr.VarAttr(name = ENABLE_COMMON_EXPR_PUSHDOWN_FOR_INVERTED_INDEX, needForward = true, description = { + "是否启用表达式上使用 inverted index。", "Set whether to use inverted index query for expr."}) + public boolean enableCommonExpPushDownForInvertedIndex = false; + // Whether enable pushdown count agg to scan node when using inverted index match. @VariableMgr.VarAttr(name = ENABLE_PUSHDOWN_COUNT_ON_INDEX, needForward = true, description = { "是否启用count_on_index pushdown。", "Set whether to pushdown count_on_index."}) @@ -2962,6 +2970,16 @@ public void setEnableInvertedIndexQuery(boolean enableInvertedIndexQuery) { this.enableInvertedIndexQuery = enableInvertedIndexQuery; } + + public boolean isEnableCommonExprPushdownForInvertedIndex() { + return enableCommonExpPushDownForInvertedIndex; + } + + + public void setEnableCommonExprPushdownForInvertedIndex(boolean enableCommonExpPushDownForInvertedIndex) { + this.enableCommonExpPushDownForInvertedIndex = enableCommonExpPushDownForInvertedIndex; + } + public boolean isEnablePushDownCountOnIndex() { return enablePushDownCountOnIndex; } @@ -3105,6 +3123,7 @@ public TQueryOptions toThrift() { tResult.setFileCacheBasePath(fileCacheBasePath); tResult.setEnableInvertedIndexQuery(enableInvertedIndexQuery); + tResult.setEnableCommonExprPushdownForInvertedIndex(enableCommonExpPushDownForInvertedIndex); if (dryRunQuery) { tResult.setDryRunQuery(true); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index b71ddfa21a36eb..99722b015dbf88 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -285,7 +285,9 @@ struct TQueryOptions { 104: optional i64 min_revocable_mem = 0 105: optional i64 spill_streaming_agg_mem_limit = 0; - + // expr pushdown for index filter rows + 106: optional bool enable_common_expr_pushdown_for_inverted_index = false; + // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false } diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy index f2100fe4ae375c..e7fecdc77d253a 100644 --- a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy @@ -27,6 +27,7 @@ suite("test_array_contains_with_inverted_index"){ // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true sql """ set enable_common_expr_pushdown = true; """ + sql """ set enable_common_expr_pushdown_for_inverted_index = true; """ sql "DROP TABLE IF EXISTS ${indexTblName}" // create 1 replica table From 2333daff77e29eca7014592bc67168a50545d173 Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 28 Apr 2024 10:25:28 +0800 Subject: [PATCH 14/16] fix columnid for schema id for iterator --- .../segment_v2/inverted_index_reader.cpp | 73 +++++++------------ .../rowset/segment_v2/inverted_index_reader.h | 64 +++++++--------- .../rowset/segment_v2/segment_iterator.cpp | 14 ++-- be/src/vec/exprs/vectorized_fn_call.cpp | 4 +- 4 files changed, 60 insertions(+), 95 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index dee7ae5fb43977..51145989bd7904 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -86,52 +86,33 @@ Status InvertedIndexQueryParamFactory::create_query_value( return Status::OK(); }; -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); -template Status InvertedIndexQueryParamFactory::create_query_value( - const void* value, std::unique_ptr& result_param); +#define CREATE_QUERY_VALUE_TEMPLATE(PT) \ + template Status InvertedIndexQueryParamFactory::create_query_value( \ + const void* value, std::unique_ptr& result_param); + +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BOOLEAN) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_TINYINT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_SMALLINT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_INT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BIGINT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_LARGEINT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_FLOAT) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DOUBLE) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_VARCHAR) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATE) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATEV2) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIME) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIMEV2) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_CHAR) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMALV2) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL32) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL64) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL128I) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL256) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_HLL) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_STRING) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV4) +CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV6) std::unique_ptr InvertedIndexReader::create_analyzer( InvertedIndexCtx* inverted_index_ctx) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index b41c3a9333ad56..8bcb910f763b4f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -298,46 +298,32 @@ class InvertedIndexQueryParamFactory { const PrimitiveType& primitiveType, const void* value, std::unique_ptr& result_param) { switch (primitiveType) { - case PrimitiveType::TYPE_BOOLEAN: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_TINYINT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_SMALLINT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_INT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_BIGINT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_LARGEINT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_FLOAT: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DOUBLE: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DECIMALV2: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DECIMAL32: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DECIMAL64: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DECIMAL128I: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DECIMAL256: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DATE: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_DATETIME: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_CHAR: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_VARCHAR: - return create_query_value(value, result_param); - case PrimitiveType::TYPE_STRING: - return create_query_value(value, result_param); +#define M(TYPE) \ + case TYPE: { \ + return create_query_value(value, result_param); \ + } + M(PrimitiveType::TYPE_BOOLEAN) + M(PrimitiveType::TYPE_TINYINT) + M(PrimitiveType::TYPE_SMALLINT) + M(PrimitiveType::TYPE_INT) + M(PrimitiveType::TYPE_BIGINT) + M(PrimitiveType::TYPE_LARGEINT) + M(PrimitiveType::TYPE_FLOAT) + M(PrimitiveType::TYPE_DOUBLE) + M(PrimitiveType::TYPE_DECIMALV2) + M(PrimitiveType::TYPE_DECIMAL32) + M(PrimitiveType::TYPE_DECIMAL64) + M(PrimitiveType::TYPE_DECIMAL128I) + M(PrimitiveType::TYPE_DECIMAL256) + M(PrimitiveType::TYPE_DATE) + M(PrimitiveType::TYPE_DATETIME) + M(PrimitiveType::TYPE_CHAR) + M(PrimitiveType::TYPE_VARCHAR) + M(PrimitiveType::TYPE_STRING) +#undef M default: - LOG(FATAL) << "Unsupported primitive type for inverted index reader : " - << primitiveType; - return Status::NotSupported("Unsupported primitive type for inverted index reader"); + return Status::NotSupported("Unsupported primitive type {} for inverted index reader", + primitiveType); } }; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 40d945984049b1..78a05d5f73aed0 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -309,8 +309,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { _remaining_conjunct_roots = opts.remaining_conjunct_roots; _common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down; _enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty(); - _enable_common_expr_pushdown = - _opts.runtime_state->enable_common_expr_pushdown_for_inverted_index(); _column_predicate_info.reset(new ColumnPredicateInfo()); for (auto& expr : _remaining_conjunct_roots) { @@ -743,7 +741,7 @@ Status SegmentIterator::_extract_common_expr_columns_for_index(const vectorized: auto node_type = expr->node_type(); if (node_type == TExprNodeType::SLOT_REF) { auto slot_expr = std::dynamic_pointer_cast(expr); - _common_expr_columns_for_index.insert(_schema->column_id(slot_expr->column_id())); + _common_expr_columns_for_index.insert(slot_expr->column_id()); } return Status::OK(); } @@ -1249,11 +1247,11 @@ Status SegmentIterator::_apply_inverted_index() { // support expr to evaluate inverted index std::unordered_map> iter_map; - for (auto col_id : _common_expr_columns_for_index) { - if (_check_apply_by_inverted_index(col_id)) { - iter_map[col_id] = std::make_pair(_storage_name_and_type[col_id], - _inverted_index_iterators[col_id].get()); + auto tablet_col_id = _schema->column_id(col_id); + if (_check_apply_by_inverted_index(tablet_col_id)) { + iter_map[col_id] = std::make_pair(_storage_name_and_type[tablet_col_id], + _inverted_index_iterators[tablet_col_id].get()); } } for (auto expr_ctx : _common_expr_ctxs_push_down) { @@ -1271,7 +1269,7 @@ Status SegmentIterator::_apply_inverted_index() { continue; } else { // other code is not to be handled, we should just break - LOG(WARNING) << "failed to evaluate inverted index for expr_ctx" + LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: " << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); break; diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index de700dd132c82b..b98947a4b772bb 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -151,8 +151,8 @@ Status VectorizedFnCall::eval_inverted_index( return _function->eval_inverted_index(context->fn_context(_fn_context_index), pair.first, pair.second, num_rows, bitmap); } else { - return Status::InternalError("column id ", column_slot_ref->column_id(), - " not found in colid_to_inverted_index_iter"); + return Status::InternalError("column id {} not found in colid_to_inverted_index_iter", + column_slot_ref->column_id()); } } else { return Status::NotSupported("we can only eval inverted index for slot ref expr, but got ", From 56b4e88bd84d23bdba38f3db5d34d3115a6196de Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 28 Apr 2024 11:36:24 +0800 Subject: [PATCH 15/16] fixed --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 +- be/src/vec/exprs/vectorized_fn_call.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index de3add9fb460a1..3adef84e7dda55 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1272,7 +1272,7 @@ Status SegmentIterator::_apply_inverted_index() { LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: " << expr_ctx->root()->debug_string() << ", error msg: " << st.to_string(); - break; + return st; } } else { // every single result of expr_ctx must be `and` collection relationship diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 58fc3c1c334b69..732d99d0f0ec93 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -156,8 +156,8 @@ Status VectorizedFnCall::eval_inverted_index( return _function->eval_inverted_index(context->fn_context(_fn_context_index), pair.first, pair.second, num_rows, bitmap); } else { - return Status::InternalError("column id {} not found in colid_to_inverted_index_iter", - column_slot_ref->column_id()); + return Status::NotSupported("column id {} not found in colid_to_inverted_index_iter", + column_slot_ref->column_id()); } } else { return Status::NotSupported("we can only eval inverted index for slot ref expr, but got ", From e083aef64a1ad2b48ddb69adb5221e60fd8a0a2e Mon Sep 17 00:00:00 2001 From: amorynan Date: Sun, 28 Apr 2024 23:41:03 +0800 Subject: [PATCH 16/16] fix or logic --- be/src/vec/exprs/vcompound_pred.h | 5 +++-- .../test_array_contains_with_inverted_index.groovy | 14 +++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 6ac8dd6d8f0e87..4ff628548692d3 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -74,14 +74,15 @@ class VCompoundPred : public VectorizedFnCall { bitmap->addRange(0, num_rows); return st; } - *res |= *child_roaring; - if (res->cardinality() == num_rows) { + if (child_roaring->cardinality() == 0) { // means inverted index filter do not reduce any rows // the left expr no need to be extracted by inverted index, // and cur roaring is all rows which means this inverted index is not useful, // do not need to calculate with res bitmap + bitmap->addRange(0, num_rows); return Status::OK(); } + *res |= *child_roaring; } *bitmap = *res; } else if (_op == TExprOpcode::COMPOUND_AND) { diff --git a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy index e7fecdc77d253a..0ea18c784d6340 100644 --- a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy @@ -17,22 +17,18 @@ suite("test_array_contains_with_inverted_index"){ // prepare test table - - def timeout = 60000 - def delta_time = 1000 - def alter_res = "null" - def useTime = 0 - def indexTblName = "tai" // If we use common expr pass to inverted index , we should set enable_common_expr_pushdown = true sql """ set enable_common_expr_pushdown = true; """ sql """ set enable_common_expr_pushdown_for_inverted_index = true; """ + sql """ set enable_pipeline_x_engine = true;""" + sql """ set enable_profile = true;""" sql "DROP TABLE IF EXISTS ${indexTblName}" // create 1 replica table sql """ - CREATE TABLE `${indexTblName}` ( + CREATE TABLE IF NOT EXISTS `${indexTblName}` ( `apply_date` date NULL COMMENT '', `id` varchar(60) NOT NULL COMMENT '', `inventors` array NULL COMMENT '', @@ -69,7 +65,7 @@ suite("test_array_contains_with_inverted_index"){ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """ qt_sql """ select count() from ${indexTblName}""" - order_qt_sql """ select * from tai where array_contains(inventors, 's'); """ + order_qt_sql """ select * from tai where array_contains(inventors, 's') order by id; """ order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2017-01-01' order by id; """ order_qt_sql """ select * from tai where array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """ @@ -79,4 +75,4 @@ suite("test_array_contains_with_inverted_index"){ order_qt_sql """ select * from tai where !array_contains(inventors, 's') and apply_date = '2019-01-01' order by id; """ order_qt_sql """ select * from tai where !array_contains(inventors, 's') or apply_date = '2017-01-01' order by id; """ order_qt_sql """ select * from tai where (array_contains(inventors, 's') and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """ -} \ No newline at end of file +}