diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index d76a2bb506f4b1..44eaa4ba233dc8 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -144,6 +144,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) // todo(zeno) define interface in IColumn to simplify code +// If 1 OP 0 returns true, it means the predicate is > or >= +// If 1 OP 1 returns true, it means the predicate is >= or <= +// by this way, avoid redundant code #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \ template \ void CLASS::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ @@ -161,7 +164,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) vectorized::ColumnDictionary>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ auto dict_code = \ - IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : nested_col_ptr->find_code(_value); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ @@ -190,7 +193,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) reinterpret_cast&>( \ column); \ auto& data_array = dict_col.get_data(); \ - auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : dict_col.find_code(_value); \ for (uint16_t i = 0; i < *size; ++i) { \ uint16_t idx = sel[i]; \ diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 5a51afbeea6d04..f3d07a6ad35709 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -98,12 +98,10 @@ class ColumnDictionary final : public COWHelper> { } void insert_data(const char* pos, size_t /*length*/) override { - _codes.push_back(unaligned_load(pos)); + LOG(FATAL) << "insert_data not supported in ColumnDictionary"; } - void insert_data(const T value) { _codes.push_back(value); } - - void insert_default() override { _codes.push_back(T()); } + void insert_default() override { _codes.push_back(_dict.get_null_code()); } void clear() override { _codes.clear(); @@ -219,13 +217,12 @@ class ColumnDictionary final : public COWHelper> { void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict_array, size_t data_num, uint32_t dict_num) override { - if (!is_dict_inited()) { + if (_dict.empty()) { _dict.reserve(dict_num); for (uint32_t i = 0; i < dict_num; ++i) { auto value = StringValue(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } - _dict_inited = true; } char* end_ptr = (char*)_codes.get_end_ptr(); @@ -263,8 +260,6 @@ class ColumnDictionary final : public COWHelper> { return _dict.find_codes(values); } - bool is_dict_inited() const { return _dict_inited; } - bool is_dict_sorted() const { return _dict_sorted; } bool is_dict_code_converted() const { return _dict_code_converted; } @@ -301,13 +296,17 @@ class ColumnDictionary final : public COWHelper> { if (it != _inverted_index.end()) { return it->second; } - return -1; + return -2; // -1 is null code } - inline StringValue& get_value(T code) { return _dict_data[code]; } + T get_null_code() { return -1; } + + inline StringValue& get_value(T code) { + return code >= _dict_data.size() ? _null_value : _dict_data[code]; + } inline void generate_hash_values() { - if (_hash_values.size() == 0) { + if (_hash_values.empty()) { _hash_values.resize(_dict_data.size()); for (size_t i = 0; i < _dict_data.size(); i++) { auto& sv = _dict_data[i]; @@ -380,7 +379,10 @@ class ColumnDictionary final : public COWHelper> { size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); } + bool empty() { return _dict_data.empty(); } + private: + StringValue _null_value = StringValue(); StringValue::Comparator _comparator; // dict code -> dict value DictContainer _dict_data; @@ -398,16 +400,12 @@ class ColumnDictionary final : public COWHelper> { private: size_t _reserve_size; - bool _dict_inited = false; bool _dict_sorted = false; bool _dict_code_converted = false; Dictionary _dict; Container _codes; }; -template class ColumnDictionary; -template class ColumnDictionary; -template class ColumnDictionary; template class ColumnDictionary; using ColumnDictI32 = vectorized::ColumnDictionary;