Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ class ComparisonPredicateBase : public ColumnPredicate {

std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();

auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(_value);
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, roaring));
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(
InvertedIndexQueryParamFactory::create_query_value<Type>(&_value, query_param));
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, query_param->get_value(),
query_type, num_rows, roaring));

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
// and be treated as false in WHERE
Expand Down
11 changes: 7 additions & 4 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,15 @@ class InListPredicateBase : public ColumnPredicate {
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
const void* ptr = iter->get_value();
auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(
*reinterpret_cast<const T*>(ptr));
// auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(
// *reinterpret_cast<const T*>(ptr));
std::unique_ptr<InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(
InvertedIndexQueryParamFactory::create_query_value<Type>(ptr, query_param));
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, index));
RETURN_IF_ERROR(iterator->read_from_inverted_index(
column_name, query_param->get_value(), query_type, num_rows, index));
indices |= *index;
iter->next();
}
Expand Down
41 changes: 41 additions & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,47 @@

namespace doris::segment_v2 {

template <PrimitiveType PT>
Status InvertedIndexQueryParamFactory::create_query_value(
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
using CPP_TYPE = typename PrimitiveTypeTraits<PT>::CppType;
std::unique_ptr<InvertedIndexQueryParam<PT>> param =
InvertedIndexQueryParam<PT>::create_unique();
auto&& storage_val = PrimitiveTypeConvertor<PT>::to_storage_field_type(
*reinterpret_cast<const CPP_TYPE*>(value));
param->set_value(&storage_val);
result_param = std::move(param);
return Status::OK();
};

#define CREATE_QUERY_VALUE_TEMPLATE(PT) \
template Status InvertedIndexQueryParamFactory::create_query_value<PT>( \
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);

CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BOOLEAN)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_TINYINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_SMALLINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_INT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BIGINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_LARGEINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_FLOAT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DOUBLE)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_VARCHAR)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATE)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATEV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIME)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIMEV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_CHAR)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMALV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL32)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL64)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL128I)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL256)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_HLL)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_STRING)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV4)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV6)

std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer(
InvertedIndexCtx* inverted_index_ctx) {
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
Expand Down
74 changes: 74 additions & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "olap/rowset/segment_v2/inverted_index_query_type.h"
#include "olap/tablet_schema.h"
#include "runtime/primitive_type.h"
#include "util/once.h"

#define FINALIZE_INPUT(x) \
Expand Down Expand Up @@ -279,6 +280,79 @@ class BkdIndexReader : public InvertedIndexReader {
const KeyCoder* _value_key_coder {};
};

/**
* @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object.
* we need a template function to make predict class like in_list_predict template class to use.
* also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index
* Now we just mapping field value in query engine to storage field value
*/
class InvertedIndexQueryParamFactory {
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory);

public:
virtual ~InvertedIndexQueryParamFactory() = default;

template <PrimitiveType PT>
static Status create_query_value(const void* value,
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);

static Status create_query_value(
const PrimitiveType& primitiveType, const void* value,
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
switch (primitiveType) {
#define M(TYPE) \
case TYPE: { \
return create_query_value<TYPE>(value, result_param); \
}
M(PrimitiveType::TYPE_BOOLEAN)
M(PrimitiveType::TYPE_TINYINT)
M(PrimitiveType::TYPE_SMALLINT)
M(PrimitiveType::TYPE_INT)
M(PrimitiveType::TYPE_BIGINT)
M(PrimitiveType::TYPE_LARGEINT)
M(PrimitiveType::TYPE_FLOAT)
M(PrimitiveType::TYPE_DOUBLE)
M(PrimitiveType::TYPE_DECIMALV2)
M(PrimitiveType::TYPE_DECIMAL32)
M(PrimitiveType::TYPE_DECIMAL64)
M(PrimitiveType::TYPE_DECIMAL128I)
M(PrimitiveType::TYPE_DECIMAL256)
M(PrimitiveType::TYPE_DATE)
M(PrimitiveType::TYPE_DATETIME)
M(PrimitiveType::TYPE_CHAR)
M(PrimitiveType::TYPE_VARCHAR)
M(PrimitiveType::TYPE_STRING)
#undef M
default:
return Status::NotSupported("Unsupported primitive type {} for inverted index reader",
primitiveType);
}
};

virtual const void* get_value() const {
LOG_FATAL(
"Execution reached an undefined behavior code path in "
"InvertedIndexQueryParamFactory");
__builtin_unreachable();
};
};

template <PrimitiveType PT>
class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory {
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam);
using storage_val = typename PrimitiveTypeTraits<PT>::StorageFieldType;

public:
void set_value(const storage_val* value) {
_value = *reinterpret_cast<const storage_val*>(value);
}

const void* get_value() const override { return &_value; }

private:
storage_val _value;
};

class InvertedIndexIterator {
ENABLE_FACTORY_CREATOR(InvertedIndexIterator);

Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,8 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
RETURN_IF_ERROR(add_document());
_doc->clear();
_CLDELETE(ts);
} else {
RETURN_IF_ERROR(add_null_document());
}
_rid++;
}
Expand Down
71 changes: 71 additions & 0 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,12 @@ Status SegmentIterator::_lazy_init() {
_segment->_tablet_schema->cluster_key_idxes().empty()) {
RETURN_IF_ERROR(_get_row_ranges_by_keys());
}
// extract for index apply col id which is slot_ref
if (_enable_common_expr_pushdown && !_remaining_conjunct_roots.empty()) {
for (auto expr : _remaining_conjunct_roots) {
RETURN_IF_ERROR(_extract_common_expr_columns_for_index(expr));
}
}
RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
RETURN_IF_ERROR(_vec_init_lazy_materialization());
// Remove rows that have been marked deleted
Expand Down Expand Up @@ -726,6 +732,20 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr
return Status::OK();
}

Status SegmentIterator::_extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr) {
auto& children = expr->children();
for (int i = 0; i < children.size(); ++i) {
RETURN_IF_ERROR(_extract_common_expr_columns_for_index(children[i]));
}

auto node_type = expr->node_type();
if (node_type == TExprNodeType::SLOT_REF) {
auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
_common_expr_columns_for_index.insert(slot_expr->column_id());
}
return Status::OK();
}

Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
const vectorized::VExprSPtr& expr) {
if (expr == nullptr) {
Expand Down Expand Up @@ -815,6 +835,17 @@ bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
return true;
}

bool SegmentIterator::_check_apply_by_inverted_index(ColumnId col_id) {

This comment was marked as abuse.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method '_check_apply_by_inverted_index' can be made static [readability-convert-member-functions-to-static]

be/src/olap/rowset/segment_v2/segment_iterator.h:288:

-     bool _check_apply_by_inverted_index(ColumnId col_id);
+     static bool _check_apply_by_inverted_index(ColumnId col_id);

if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
}
if (_inverted_index_iterators[col_id] == nullptr) {
//this column without inverted index
return false;
}
return true;
}

bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) {
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
Expand Down Expand Up @@ -1210,6 +1241,46 @@ Status SegmentIterator::_apply_inverted_index() {
}
}

// add a switch for inverted index filter
if (_opts.runtime_state &&
_opts.runtime_state->enable_common_expr_pushdown_for_inverted_index()) {
// support expr to evaluate inverted index
std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair, InvertedIndexIterator*>>
iter_map;
for (auto col_id : _common_expr_columns_for_index) {
auto tablet_col_id = _schema->column_id(col_id);
if (_check_apply_by_inverted_index(tablet_col_id)) {
iter_map[col_id] = std::make_pair(_storage_name_and_type[tablet_col_id],
_inverted_index_iterators[tablet_col_id].get());
}
}
for (auto expr_ctx : _common_expr_ctxs_push_down) {
// _inverted_index_iterators has all column ids which has inverted index
// _common_expr_columns has all column ids from _common_expr_ctxs_push_down
// if current bitmap is already empty just return
if (_row_bitmap.isEmpty()) {
break;
}
std::shared_ptr<roaring::Roaring> result_bitmap = std::make_shared<roaring::Roaring>();
if (Status st =
expr_ctx->eval_inverted_index(iter_map, num_rows(), result_bitmap.get());
!st.ok()) {
if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
continue;
} else {
// other code is not to be handled, we should just break
LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: "
<< expr_ctx->root()->debug_string()
<< ", error msg: " << st.to_string();
return st;
}
} else {
// every single result of expr_ctx must be `and` collection relationship
_row_bitmap &= *result_bitmap;
}
}
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after expr was evaluated in index , it will be evaluated again later in common expr processing phase?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes , so far it is , maybe later @yiguolei will make some optimization

_col_predicates = std::move(remaining_predicates);
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
return Status::OK();
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/rowset/segment_v2/segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ class SegmentIterator : public RowwiseIterator {
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);

[[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr);
// same with _extract_common_expr_columns, but only extract columns that can be used for index
[[nodiscard]] Status _extract_common_expr_columns_for_index(const vectorized::VExprSPtr& expr);
[[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size,
vectorized::Block* block);
uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size,
Expand All @@ -284,6 +286,7 @@ class SegmentIterator : public RowwiseIterator {

void _convert_dict_code_for_predicate_if_necessary_impl(ColumnPredicate* predicate);

bool _check_apply_by_inverted_index(ColumnId col_id);
bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);

std::string _gen_predicate_result_sign(ColumnPredicate* predicate);
Expand Down Expand Up @@ -409,6 +412,7 @@ class SegmentIterator : public RowwiseIterator {
// columns to read after predicate evaluation and remaining expr execute
std::vector<ColumnId> _non_predicate_columns;
std::set<ColumnId> _common_expr_columns;
std::set<ColumnId> _common_expr_columns_for_index;
// remember the rowids we've read for the current row block.
// could be a local variable of next_batch(), kept here to reuse vector memory
std::vector<rowid_t> _block_rowids;
Expand Down
6 changes: 6 additions & 0 deletions be/src/runtime/runtime_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,12 @@ class RuntimeState {
_query_options.enable_common_expr_pushdown;
}

bool enable_common_expr_pushdown_for_inverted_index() const {
return enable_common_expr_pushdown() &&
_query_options.__isset.enable_common_expr_pushdown_for_inverted_index &&
_query_options.enable_common_expr_pushdown_for_inverted_index;
};

bool enable_faster_float_convert() const {
return _query_options.__isset.faster_float_convert && _query_options.faster_float_convert;
}
Expand Down
69 changes: 69 additions & 0 deletions be/src/vec/exprs/vcompound_pred.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,75 @@ class VCompoundPred : public VectorizedFnCall {

const std::string& expr_name() const override { return _expr_name; }

// 1. when meet 'or' conjunct: a or b, if b can apply index, return all rows, so b should not be extracted
// 2. when meet 'and' conjunct, function with column b can not apply inverted index
// eg. a and hash(b)=1, if b can apply index, but hash(b)=1 is not for index, so b should not be extracted
// but a and array_contains(b, 1), b can be applied inverted index, which b can be extracted
Status eval_inverted_index(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'eval_inverted_index' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status eval_inverted_index(
static Status eval_inverted_index(

be/src/vec/exprs/vcompound_pred.h:74:

-             uint32_t num_rows, roaring::Roaring* bitmap) const override {
+             uint32_t num_rows, roaring::Roaring* bitmap) override {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'eval_inverted_index' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status eval_inverted_index(
static Status eval_inverted_index(

be/src/vec/exprs/vcompound_pred.h:64:

-             uint32_t num_rows, roaring::Roaring* bitmap) const override {
+             uint32_t num_rows, roaring::Roaring* bitmap) override {

VExprContext* context,
const std::unordered_map<ColumnId, std::pair<vectorized::NameAndTypePair,
segment_v2::InvertedIndexIterator*>>&
colid_to_inverted_index_iter,
uint32_t num_rows, roaring::Roaring* bitmap) const override {
std::shared_ptr<roaring::Roaring> res = std::make_shared<roaring::Roaring>();
if (_op == TExprOpcode::COMPOUND_OR) {
for (auto child : _children) {
std::shared_ptr<roaring::Roaring> child_roaring =
std::make_shared<roaring::Roaring>();
Status st = child->eval_inverted_index(context, colid_to_inverted_index_iter,
num_rows, child_roaring.get());
if (!st.ok()) {
bitmap->addRange(0, num_rows);
return st;
}
if (child_roaring->cardinality() == 0) {
// means inverted index filter do not reduce any rows
// the left expr no need to be extracted by inverted index,
// and cur roaring is all rows which means this inverted index is not useful,
// do not need to calculate with res bitmap
bitmap->addRange(0, num_rows);
return Status::OK();
}
*res |= *child_roaring;
}
*bitmap = *res;
} else if (_op == TExprOpcode::COMPOUND_AND) {
for (int i = 0; i < _children.size(); ++i) {
std::shared_ptr<roaring::Roaring> child_roaring =
std::make_shared<roaring::Roaring>();
Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter,
num_rows, child_roaring.get());
if (!st.ok()) {
continue;
}
if (i == 0) {
*res = *child_roaring;
} else {
*res &= *child_roaring;
}
if (res->isEmpty()) {
// the left expr no need to be extracted by inverted index, just return 0 rows
// res bitmap will be zero
return Status::OK();
}
}
*bitmap = *res;
} else if (_op == TExprOpcode::COMPOUND_NOT) {
Status st = _children[0]->eval_inverted_index(context, colid_to_inverted_index_iter,
num_rows, res.get());
if (!st.ok()) {
return st;
}
std::shared_ptr<roaring::Roaring> all_rows = std::make_shared<roaring::Roaring>();
all_rows->addRange(0, num_rows);
*bitmap = *all_rows - *res;
} else {
return Status::NotSupported(
"Compound operator must be AND or OR or Not can execute with inverted index.");
}
return Status::OK();
}

Status execute(VExprContext* context, Block* block, int* result_column_id) override {
if (children().size() == 1 || !_all_child_is_compound_and_not_const()) {
return VectorizedFnCall::execute(context, block, result_column_id);
Expand Down
Loading