-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[Improve](expr)first support array_contains for expr push down inverted index #32620
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
0744cb3
ba783e8
5bfddbd
f6612b6
515a84d
038af1b
16084f0
a6cca1f
8774f47
8931ec3
e1192fc
3a5ab22
d48a1de
1c2126a
c499a84
20a8745
a6a9770
2333daf
7b5260b
56b4e88
e083aef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -362,6 +362,12 @@ Status SegmentIterator::_lazy_init() { | |
| _segment->_tablet_schema->cluster_key_idxes().empty()) { | ||
| RETURN_IF_ERROR(_get_row_ranges_by_keys()); | ||
| } | ||
| // extract for index apply col id which is slot_ref | ||
| if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) { | ||
| for (auto expr : _remaining_conjunct_roots) { | ||
| RETURN_IF_ERROR(_extract_common_expr_columns_for_index(expr)); | ||
| } | ||
| } | ||
| RETURN_IF_ERROR(_get_row_ranges_by_column_conditions()); | ||
| RETURN_IF_ERROR(_vec_init_lazy_materialization()); | ||
| // Remove rows that have been marked deleted | ||
|
|
@@ -726,6 +732,20 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr | |
| return Status::OK(); | ||
| } | ||
|
|
||
| Status SegmentIterator::_extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr) { | ||
| auto& children = expr->children(); | ||
| for (int i = 0; i < children.size(); ++i) { | ||
| RETURN_IF_ERROR(_extract_common_expr_columns_for_index(children[i])); | ||
| } | ||
|
|
||
| auto node_type = expr->node_type(); | ||
| if (node_type == TExprNodeType::SLOT_REF) { | ||
| auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr); | ||
| _common_expr_columns_for_index.insert(slot_expr->column_id()); | ||
| } | ||
| return Status::OK(); | ||
| } | ||
|
|
||
| Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode( | ||
| const vectorized::VExprSPtr& expr) { | ||
| if (expr == nullptr) { | ||
|
|
@@ -815,6 +835,17 @@ bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() { | |
| return true; | ||
| } | ||
|
|
||
| bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) { | ||
This comment was marked as abuse.
Sorry, something went wrong.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method '_check_apply_by_inverted_index' can be made static [readability-convert-member-functions-to-static] be/src/olap/rowset/segment_v2/segment_iterator.h:288: - bool _check_apply_by_inverted_index(ColumnId col_id);
+ static bool _check_apply_by_inverted_index(ColumnId col_id);
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { | ||
| return false; | ||
| } | ||
| if (_inverted_index_iterators[col_id] == nullptr) { | ||
| //this column without inverted index | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) { | ||
| if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) { | ||
| return false; | ||
|
|
@@ -1210,6 +1241,46 @@ Status SegmentIterator::_apply_inverted_index() { | |
| } | ||
| } | ||
|
|
||
| // add a switch for inverted index filter | ||
| if (_opts.runtime_state && | ||
| _opts.runtime_state->enable_common_expr_pushdown_for_inverted_index()) { | ||
| // support expr to evaluate inverted index | ||
| std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, InvertedIndexIterator*>> | ||
| iter_map; | ||
| for (auto col_id : _common_expr_columns_for_index) { | ||
| auto tablet_col_id = _schema->column_id(col_id); | ||
| if (_check_apply_by_inverted_index(tablet_col_id)) { | ||
| iter_map[col_id] = std::make_pair(_storage_name_and_type[tablet_col_id], | ||
| _inverted_index_iterators[tablet_col_id].get()); | ||
| } | ||
| } | ||
| for (auto expr_ctx : _common_expr_ctxs_push_down) { | ||
| // _inverted_index_iterators has all column ids which has inverted index | ||
| // _common_expr_columns has all column ids from _common_expr_ctxs_push_down | ||
| // if current bitmap is already empty just return | ||
| if (_row_bitmap.isEmpty()) { | ||
| break; | ||
| } | ||
| std::shared_ptr<roaring::Roaring> result_bitmap = std::make_shared<roaring::Roaring>(); | ||
| if (Status st = | ||
| expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get()); | ||
| !st.ok()) { | ||
| if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { | ||
| continue; | ||
| } else { | ||
| // other code is not to be handled, we should just break | ||
| LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: " | ||
| << expr_ctx->root()->debug_string() | ||
| << ", error msg: " << st.to_string(); | ||
| return st; | ||
| } | ||
| } else { | ||
| // every single result of expr_ctx must be `and` collection relationship | ||
| _row_bitmap &= *result_bitmap; | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. after expr was evaluated in index , it will be evaluated again later in common expr processing phase?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes , so far it is , maybe later @yiguolei will make some optimization |
||
| _col_predicates = std::move(remaining_predicates); | ||
| _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality()); | ||
| return Status::OK(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall { | |||||||||
|
|
||||||||||
| const std::string& expr_name() const override { return _expr_name; } | ||||||||||
|
|
||||||||||
| // 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted | ||||||||||
| // 2. when meet 'and' conjunct, function with column b can not apply inverted index | ||||||||||
| // eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted | ||||||||||
| // but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted | ||||||||||
| Status eval_inverted_index( | ||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'eval_inverted_index' can be made static [readability-convert-member-functions-to-static]
Suggested change
be/src/vec/exprs/vcompound_pred.h:74: - uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ uint32_t num_rows, roaring::Roaring* bitmap) override {
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'eval_inverted_index' can be made static [readability-convert-member-functions-to-static]
Suggested change
be/src/vec/exprs/vcompound_pred.h:64: - uint32_t num_rows, roaring::Roaring* bitmap) const override {
+ uint32_t num_rows, roaring::Roaring* bitmap) override { |
||||||||||
| VExprContext* context, | ||||||||||
| const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, | ||||||||||
| segment_v2::InvertedIndexIterator*>>& | ||||||||||
| colid_to_inverted_index_iter, | ||||||||||
| uint32_t num_rows, roaring::Roaring* bitmap) const override { | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| std::shared_ptr<roaring::Roaring> res = std::make_shared<roaring::Roaring>(); | ||||||||||
| if (_op == TExprOpcode::COMPOUND_OR) { | ||||||||||
| for (auto child : _children) { | ||||||||||
| std::shared_ptr<roaring::Roaring> child_roaring = | ||||||||||
| std::make_shared<roaring::Roaring>(); | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter, | ||||||||||
| num_rows, child_roaring.get()); | ||||||||||
| if (!st.ok()) { | ||||||||||
eldenmoon marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| bitmap->addRange(0, num_rows); | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| return st; | ||||||||||
| } | ||||||||||
| if (child_roaring->cardinality() == 0) { | ||||||||||
| // means inverted index filter do not reduce any rows | ||||||||||
| // the left expr no need to be extracted by inverted index, | ||||||||||
| // and cur roaring is all rows which means this inverted index is not useful, | ||||||||||
| // do not need to calculate with res bitmap | ||||||||||
| bitmap->addRange(0, num_rows); | ||||||||||
| return Status::OK(); | ||||||||||
| } | ||||||||||
| *res |= *child_roaring; | ||||||||||
| } | ||||||||||
| *bitmap = *res; | ||||||||||
| } else if (_op == TExprOpcode::COMPOUND_AND) { | ||||||||||
| for (int i = 0; i < _children.size(); ++i) { | ||||||||||
| std::shared_ptr<roaring::Roaring> child_roaring = | ||||||||||
| std::make_shared<roaring::Roaring>(); | ||||||||||
| Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| num_rows, child_roaring.get()); | ||||||||||
| if (!st.ok()) { | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| continue; | ||||||||||
| } | ||||||||||
| if (i == 0) { | ||||||||||
| *res = *child_roaring; | ||||||||||
| } else { | ||||||||||
| *res &= *child_roaring; | ||||||||||
| } | ||||||||||
| if (res->isEmpty()) { | ||||||||||
| // the left expr no need to be extracted by inverted index, just return 0 rows | ||||||||||
| // res bitmap will be zero | ||||||||||
| return Status::OK(); | ||||||||||
| } | ||||||||||
| } | ||||||||||
| *bitmap = *res; | ||||||||||
| } else if (_op == TExprOpcode::COMPOUND_NOT) { | ||||||||||
| Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter, | ||||||||||
| num_rows, res.get()); | ||||||||||
| if (!st.ok()) { | ||||||||||
| return st; | ||||||||||
| } | ||||||||||
| std::shared_ptr<roaring::Roaring> all_rows = std::make_shared<roaring::Roaring>(); | ||||||||||
| all_rows->addRange(0, num_rows); | ||||||||||
| *bitmap = *all_rows - *res; | ||||||||||
| } else { | ||||||||||
| return Status::NotSupported( | ||||||||||
| "Compound operator must be AND or OR or Not can execute with inverted index."); | ||||||||||
amorynan marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||
| } | ||||||||||
| return Status::OK(); | ||||||||||
| } | ||||||||||
|
|
||||||||||
| Status execute(VExprContext* context, Block* block, int* result_column_id) override { | ||||||||||
| if (children().size() == 1 || !_all_child_is_compound_and_not_const()) { | ||||||||||
| return VectorizedFnCall::execute(context, block, result_column_id); | ||||||||||
|
|
||||||||||
Uh oh!
There was an error while loading. Please reload this page.