From 93c7510dd2a162dc9663c8284485ba5de3f06188 Mon Sep 17 00:00:00 2001 From: zenoyang Date: Mon, 16 May 2022 12:20:15 +0800 Subject: [PATCH 1/5] [fix](storage) low_cardinality_optimize core dump when is null predicate --- be/src/vec/columns/column_dictionary.h | 35 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 5a51afbeea6d04..53e16b6a39c1b9 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -98,12 +98,16 @@ class ColumnDictionary final : public COWHelper> { } void insert_data(const char* pos, size_t /*length*/) override { - _codes.push_back(unaligned_load(pos)); + if (pos == nullptr) { + _codes.push_back(_dict.get_null_code()); + return; + } + LOG(FATAL) << "insert_data not supported in ColumnDictionary"; } - void insert_data(const T value) { _codes.push_back(value); } - - void insert_default() override { _codes.push_back(T()); } + void insert_default() override { + _codes.push_back(_dict.get_null_code()); + } void clear() override { _codes.clear(); @@ -219,13 +223,13 @@ class ColumnDictionary final : public COWHelper> { void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict_array, size_t data_num, uint32_t dict_num) override { - if (!is_dict_inited()) { - _dict.reserve(dict_num); + if (_dict.empty()) { + _dict.reserve(dict_num + 1); for (uint32_t i = 0; i < dict_num; ++i) { auto value = StringValue(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } - _dict_inited = true; + _dict.insert_null_value(); // make the last dict value is null value } char* end_ptr = (char*)_codes.get_end_ptr(); @@ -263,8 +267,6 @@ class ColumnDictionary final : public COWHelper> { return _dict.find_codes(values); } - bool is_dict_inited() const { return _dict_inited; } - bool is_dict_sorted() const { return _dict_sorted; } bool is_dict_code_converted() const { return _dict_code_converted; } @@ -296,6 +298,12 @@ class ColumnDictionary final : public COWHelper> { _inverted_index[value] = _inverted_index.size(); } + void insert_null_value() { + auto value = StringValue(); + _dict_data.push_back_without_reserve(value); + _inverted_index[value] = _inverted_index.size(); + } + int32_t find_code(const StringValue& value) const { auto it = _inverted_index.find(value); if (it != _inverted_index.end()) { @@ -304,10 +312,14 @@ class ColumnDictionary final : public COWHelper> { return -1; } + T get_null_code() { + return _dict_data.size() - 1; // The last dict value is null value + } + inline StringValue& get_value(T code) { return _dict_data[code]; } inline void generate_hash_values() { - if (_hash_values.size() == 0) { + if (_hash_values.empty()) { _hash_values.resize(_dict_data.size()); for (size_t i = 0; i < _dict_data.size(); i++) { auto& sv = _dict_data[i]; @@ -380,6 +392,8 @@ class ColumnDictionary final : public COWHelper> { size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); } + bool empty() { return _dict_data.empty(); } + private: StringValue::Comparator _comparator; // dict code -> dict value @@ -398,7 +412,6 @@ class ColumnDictionary final : public COWHelper> { private: size_t _reserve_size; - bool _dict_inited = false; bool _dict_sorted = false; bool _dict_code_converted = false; Dictionary _dict; From dd6f2e0448ba4fe3a1976c28bce8d51eae341984 Mon Sep 17 00:00:00 2001 From: zenoyang Date: Mon, 16 May 2022 12:34:39 +0800 Subject: [PATCH 2/5] f --- be/src/vec/columns/column_dictionary.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 53e16b6a39c1b9..f1e76715d2f6e3 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -105,9 +105,7 @@ class ColumnDictionary final : public COWHelper> { LOG(FATAL) << "insert_data not supported in ColumnDictionary"; } - void insert_default() override { - _codes.push_back(_dict.get_null_code()); - } + void insert_default() override { _codes.push_back(_dict.get_null_code()); } void clear() override { _codes.clear(); @@ -229,7 +227,7 @@ class ColumnDictionary final : public COWHelper> { auto value = StringValue(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } - _dict.insert_null_value(); // make the last dict value is null value + _dict.insert_null_value(); // make the last dict value is null value } char* end_ptr = (char*)_codes.get_end_ptr(); From 199b9f253add3a714ab59a14986bce3896aaed72 Mon Sep 17 00:00:00 2001 From: zenoyang Date: Mon, 16 May 2022 15:53:00 +0800 Subject: [PATCH 3/5] fix --- be/src/vec/columns/column_dictionary.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index f1e76715d2f6e3..b0795846715a5e 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -98,6 +98,7 @@ class ColumnDictionary final : public COWHelper> { } void insert_data(const char* pos, size_t /*length*/) override { + DCHECK(pos == nullptr); if (pos == nullptr) { _codes.push_back(_dict.get_null_code()); return; @@ -222,12 +223,11 @@ class ColumnDictionary final : public COWHelper> { const StringRef* dict_array, size_t data_num, uint32_t dict_num) override { if (_dict.empty()) { - _dict.reserve(dict_num + 1); + _dict.reserve(dict_num); for (uint32_t i = 0; i < dict_num; ++i) { auto value = StringValue(dict_array[i].data, dict_array[i].size); _dict.insert_value(value); } - _dict.insert_null_value(); // make the last dict value is null value } char* end_ptr = (char*)_codes.get_end_ptr(); @@ -296,12 +296,6 @@ class ColumnDictionary final : public COWHelper> { _inverted_index[value] = _inverted_index.size(); } - void insert_null_value() { - auto value = StringValue(); - _dict_data.push_back_without_reserve(value); - _inverted_index[value] = _inverted_index.size(); - } - int32_t find_code(const StringValue& value) const { auto it = _inverted_index.find(value); if (it != _inverted_index.end()) { @@ -311,10 +305,12 @@ class ColumnDictionary final : public COWHelper> { } T get_null_code() { - return _dict_data.size() - 1; // The last dict value is null value + return _dict_data.size(); // Make null code greater than the index of the dict } - inline StringValue& get_value(T code) { return _dict_data[code]; } + inline StringValue& get_value(T code) { + return code >= _dict_data.size() ? _null_value : _dict_data[code]; + } inline void generate_hash_values() { if (_hash_values.empty()) { @@ -393,6 +389,7 @@ class ColumnDictionary final : public COWHelper> { bool empty() { return _dict_data.empty(); } private: + StringValue _null_value = StringValue(); StringValue::Comparator _comparator; // dict code -> dict value DictContainer _dict_data; From 2aa61e89201d160f8e8d9d728cb7157337e846bf Mon Sep 17 00:00:00 2001 From: zenoyang Date: Mon, 16 May 2022 21:32:17 +0800 Subject: [PATCH 4/5] fix2 --- be/src/olap/comparison_predicate.cpp | 7 +++++-- be/src/vec/columns/column_dictionary.h | 5 ----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index d76a2bb506f4b1..44eaa4ba233dc8 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -144,6 +144,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) // todo(zeno) define interface in IColumn to simplify code +// If 1 OP 0 returns true, it means the predicate is > or >= +// If 1 OP 1 returns true, it means the predicate is >= or <= +// by this way, avoid redundant code #define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \ template \ void CLASS::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ @@ -161,7 +164,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) vectorized::ColumnDictionary>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ auto dict_code = \ - IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : nested_col_ptr->find_code(_value); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ @@ -190,7 +193,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) reinterpret_cast&>( \ column); \ auto& data_array = dict_col.get_data(); \ - auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \ : dict_col.find_code(_value); \ for (uint16_t i = 0; i < *size; ++i) { \ uint16_t idx = sel[i]; \ diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index b0795846715a5e..7bc10921615fe0 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -98,11 +98,6 @@ class ColumnDictionary final : public COWHelper> { } void insert_data(const char* pos, size_t /*length*/) override { - DCHECK(pos == nullptr); - if (pos == nullptr) { - _codes.push_back(_dict.get_null_code()); - return; - } LOG(FATAL) << "insert_data not supported in ColumnDictionary"; } From c9da60e8c203572848c88bd3d3173d2858535e8d Mon Sep 17 00:00:00 2001 From: zenoyang Date: Tue, 17 May 2022 12:12:25 +0800 Subject: [PATCH 5/5] fix3 --- be/src/vec/columns/column_dictionary.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 7bc10921615fe0..f3d07a6ad35709 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -296,12 +296,10 @@ class ColumnDictionary final : public COWHelper> { if (it != _inverted_index.end()) { return it->second; } - return -1; + return -2; // -1 is null code } - T get_null_code() { - return _dict_data.size(); // Make null code greater than the index of the dict - } + T get_null_code() { return -1; } inline StringValue& get_value(T code) { return code >= _dict_data.size() ? _null_value : _dict_data[code]; @@ -408,9 +406,6 @@ class ColumnDictionary final : public COWHelper> { Container _codes; }; -template class ColumnDictionary; -template class ColumnDictionary; -template class ColumnDictionary; template class ColumnDictionary; using ColumnDictI32 = vectorized::ColumnDictionary;