From 160e343309e35fb2d12b0c1855c2e3d48c6cf4cd Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 11 Jul 2024 10:59:23 +0800 Subject: [PATCH 1/2] [Refactor](Variant) make many insterfaces exception safe --- be/src/vec/columns/column_object.cpp | 16 ++- be/src/vec/columns/column_object.h | 146 ++++++++++++++++++++------- 2 files changed, 119 insertions(+), 43 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 043c442e275c53..0b17ad5c0f8b7a 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -21,6 +21,7 @@ #include "vec/columns/column_object.h" #include +#include #include #include #include @@ -34,6 +35,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -677,8 +679,6 @@ void ColumnObject::check_consistency() const { } for (const auto& leaf : subcolumns) { if (num_rows != leaf->data.size()) { - // LOG(FATAL) << "unmatched column:" << leaf->path.get_path() - // << ", expeted rows:" << num_rows << ", but meet:" << leaf->data.size(); throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, "unmatched column: {}, expeted rows: {}, but meet: {}", leaf->path.get_path(), num_rows, leaf->data.size()); @@ -1552,9 +1552,11 @@ void ColumnObject::update_hash_with_value(size_t n, SipHash& hash) const { } for_each_imutable_subcolumn([&](const auto& subcolumn) { if (n >= subcolumn.size()) { - LOG(FATAL) << n << " greater than column size " << subcolumn.size() - << " sub_column_info:" << subcolumn.dump_structure() - << " total lines of this column " << num_rows; + std::stringstream ss; + ss << n << " greater than column size " << subcolumn.size() + << " sub_column_info:" << subcolumn.dump_structure() + << " total lines of this column " << num_rows; + throw doris::Exception(ErrorCode::INTERNAL_ERROR, ss.str()); } return subcolumn.update_hash_with_value(n, hash); }); @@ -1600,8 +1602,4 @@ Status ColumnObject::sanitize() const { return Status::OK(); } -void ColumnObject::replace_column_data(const IColumn& col, size_t row, size_t self_row) { - LOG(FATAL) << "Method replace_column_data is not supported for " << get_name(); -} - } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index f19d51796a8332..61146c6c0ef6a8 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -295,16 +295,6 @@ class ColumnObject final : public COWHelper { // return null if not found const Subcolumn* get_subcolumn(const PathInData& key, size_t index_hint) const; - /** More efficient methods of manipulation */ - [[noreturn]] IColumn& get_data() { - LOG(FATAL) << "Not implemented method get_data()"; - __builtin_unreachable(); - } - [[noreturn]] const IColumn& get_data() const { - LOG(FATAL) << "Not implemented method get_data()"; - __builtin_unreachable(); - } - // return null if not found Subcolumn* get_subcolumn(const PathInData& key); @@ -429,35 +419,13 @@ class ColumnObject final : public COWHelper { void get(size_t n, Field& res) const override; - /// All other methods throw exception. - StringRef get_data_at(size_t) const override { - LOG(FATAL) << "should not call the method in column object"; - return StringRef(); - } - Status try_insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end); - StringRef serialize_value_into_arena(size_t n, Arena& arena, - char const*& begin) const override { - LOG(FATAL) << "should not call the method in column object"; - return StringRef(); - } - void for_each_imutable_subcolumn(ImutableColumnCallback callback) const; - const char* deserialize_and_insert_from_arena(const char* pos) override { - LOG(FATAL) << "should not call the method in column object"; - return nullptr; - } - void update_hash_with_value(size_t n, SipHash& hash) const override; - void insert_data(const char* pos, size_t length) override { - LOG(FATAL) << "should not call the method in column object"; - __builtin_unreachable(); - } - ColumnPtr filter(const Filter&, ssize_t) const override; Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override; @@ -468,8 +436,6 @@ class ColumnObject final : public COWHelper { bool is_variable_length() const override { return true; } - void replace_column_data(const IColumn&, size_t row, size_t self_row) override; - template MutableColumnPtr apply_for_subcolumns(Func&& func) const; @@ -488,6 +454,118 @@ class ColumnObject final : public COWHelper { Status sanitize() const; std::string debug_string() const; + +#define THROW_NOT_IMPLEMENTED_ERROR(method_name) \ + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, \ + std::string(method_name) + std::string(get_family_name())) + + MutableColumnPtr get_shrinked_column() override { + THROW_NOT_IMPLEMENTED_ERROR("get_shrinked_column"); + } + + Int64 get_int(size_t /*n*/) const override { THROW_NOT_IMPLEMENTED_ERROR("get_int"); } + + bool get_bool(size_t /*n*/) const override { THROW_NOT_IMPLEMENTED_ERROR("get_bool"); } + + void insert_many_fix_len_data(const char* pos, size_t num) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_fix_len_data"); + } + + void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict, + size_t data_num, uint32_t dict_num = 0) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_dict_data"); + } + + void insert_many_binary_data(char* data_array, uint32_t* len_array, + uint32_t* start_offset_array, size_t num) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_binary_data"); + } + + void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets, + const size_t num) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_continuous_binary_data"); + } + + void insert_many_strings(const StringRef* strings, size_t num) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_strings"); + } + + void insert_many_strings_overflow(const StringRef* strings, size_t num, + size_t max_length) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_strings_overflow"); + } + + void insert_many_raw_data(const char* pos, size_t num) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_many_raw_data"); + } + + size_t get_max_row_byte_size() const override { + THROW_NOT_IMPLEMENTED_ERROR("get_max_row_byte_size"); + } + void serialize_vec(std::vector& keys, size_t num_rows, + size_t max_row_byte_size) const override { + THROW_NOT_IMPLEMENTED_ERROR("serialize_vec"); + } + + void serialize_vec_with_null_map(std::vector& keys, size_t num_rows, + const uint8_t* null_map) const override { + THROW_NOT_IMPLEMENTED_ERROR("serialize_vec_with_null_map"); + } + + void deserialize_vec(std::vector& keys, const size_t num_rows) override { + THROW_NOT_IMPLEMENTED_ERROR("deserialize_vec"); + } + + void deserialize_vec_with_null_map(std::vector& keys, const size_t num_rows, + const uint8_t* null_map) override { + THROW_NOT_IMPLEMENTED_ERROR("deserialize_vec_with_null_map"); + } + + void update_hashes_with_value(uint64_t* __restrict hashes, + const uint8_t* __restrict null_data = nullptr) const override { + THROW_NOT_IMPLEMENTED_ERROR("update_hashes_with_value"); + } + + void update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, + const uint8_t* __restrict null_data) const override { + THROW_NOT_IMPLEMENTED_ERROR("update_xxHash_with_value"); + } + + void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, + uint32_t offset = 0, + const uint8_t* __restrict null_data = nullptr) const override { + THROW_NOT_IMPLEMENTED_ERROR("update_crcs_with_value"); + } + + void update_crc_with_value(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_data) const override { + THROW_NOT_IMPLEMENTED_ERROR("update_crc_with_value"); + } + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) const { + THROW_NOT_IMPLEMENTED_ERROR("filter_by_selector"); + } + bool structure_equals(const IColumn&) const override { + THROW_NOT_IMPLEMENTED_ERROR("structure_equals"); + } + StringRef get_raw_data() const override { THROW_NOT_IMPLEMENTED_ERROR("get_raw_data"); } + + size_t size_of_value_if_fixed() const override { + THROW_NOT_IMPLEMENTED_ERROR("size_of_value_if_fixed"); + } + StringRef get_data_at(size_t) const override { THROW_NOT_IMPLEMENTED_ERROR("get_data_at"); } + StringRef serialize_value_into_arena(size_t n, Arena& arena, + char const*& begin) const override { + THROW_NOT_IMPLEMENTED_ERROR("serialize_value_into_arena"); + } + const char* deserialize_and_insert_from_arena(const char* pos) override { + THROW_NOT_IMPLEMENTED_ERROR("deserialize_and_insert_from_arena"); + } + void insert_data(const char* pos, size_t length) override { + THROW_NOT_IMPLEMENTED_ERROR("insert_data"); + } + void replace_column_data(const IColumn&, size_t row, size_t self_row) override { + THROW_NOT_IMPLEMENTED_ERROR("replace_column_data"); + } }; } // namespace doris::vectorized From d30008b268ee5459a590add51edce922199343d5 Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Thu, 11 Jul 2024 13:03:17 +0800 Subject: [PATCH 2/2] fix and support some hash functions --- be/src/vec/columns/column_object.cpp | 53 ++++++++++-- be/src/vec/columns/column_object.h | 122 +++++++++++++++++---------- 2 files changed, 121 insertions(+), 54 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 0b17ad5c0f8b7a..55b5d171223be0 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1545,23 +1545,58 @@ void ColumnObject::insert_indices_from(const IColumn& src, const uint32_t* indic } } -void ColumnObject::update_hash_with_value(size_t n, SipHash& hash) const { - if (!is_finalized()) { - // finalize has no side effect and can be safely used in const functions - const_cast(this)->finalize(); +// finalize has no side effect and can be safely used in const functions +#define ENSURE_FINALIZED() \ + if (!is_finalized()) { \ + const_cast(this)->finalize(); \ } + +void ColumnObject::update_hash_with_value(size_t n, SipHash& hash) const { + ENSURE_FINALIZED(); for_each_imutable_subcolumn([&](const auto& subcolumn) { if (n >= subcolumn.size()) { - std::stringstream ss; - ss << n << " greater than column size " << subcolumn.size() - << " sub_column_info:" << subcolumn.dump_structure() - << " total lines of this column " << num_rows; - throw doris::Exception(ErrorCode::INTERNAL_ERROR, ss.str()); + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "greater than column size {}, sub_column_info:{}, total lines " + "of this column:{}", + subcolumn.size(), subcolumn.dump_structure(), num_rows); } return subcolumn.update_hash_with_value(n, hash); }); } +void ColumnObject::update_hashes_with_value(uint64_t* __restrict hashes, + const uint8_t* __restrict null_data) const { + ENSURE_FINALIZED(); + for_each_imutable_subcolumn([&](const auto& subcolumn) { + return subcolumn.update_hashes_with_value(hashes, nullptr); + }); +} + +void ColumnObject::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, + const uint8_t* __restrict null_data) const { + ENSURE_FINALIZED(); + for_each_imutable_subcolumn([&](const auto& subcolumn) { + return subcolumn.update_xxHash_with_value(start, end, hash, nullptr); + }); +} + +void ColumnObject::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, + uint32_t rows, uint32_t offset, + const uint8_t* __restrict null_data) const { + ENSURE_FINALIZED(); + for_each_imutable_subcolumn([&](const auto& subcolumn) { + return subcolumn.update_crcs_with_value(hash, type, rows, offset, nullptr); + }); +} + +void ColumnObject::update_crc_with_value(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_data) const { + ENSURE_FINALIZED(); + for_each_imutable_subcolumn([&](const auto& subcolumn) { + return subcolumn.update_crc_with_value(start, end, hash, nullptr); + }); +} + void ColumnObject::for_each_imutable_subcolumn(ImutableColumnCallback callback) const { for (const auto& entry : subcolumns) { for (auto& part : entry->data.data) { diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 61146c6c0ef6a8..4014f8b185a5bf 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -455,116 +455,148 @@ class ColumnObject final : public COWHelper { std::string debug_string() const; -#define THROW_NOT_IMPLEMENTED_ERROR(method_name) \ - throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, \ - std::string(method_name) + std::string(get_family_name())) + void update_hashes_with_value(uint64_t* __restrict hashes, + const uint8_t* __restrict null_data = nullptr) const override; + + void update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, + const uint8_t* __restrict null_data) const override; + + void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, + uint32_t offset = 0, + const uint8_t* __restrict null_data = nullptr) const override; + + void update_crc_with_value(size_t start, size_t end, uint32_t& hash, + const uint8_t* __restrict null_data) const override; + // Not implemented MutableColumnPtr get_shrinked_column() override { - THROW_NOT_IMPLEMENTED_ERROR("get_shrinked_column"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_shrinked_column" + std::string(get_family_name())); } - Int64 get_int(size_t /*n*/) const override { THROW_NOT_IMPLEMENTED_ERROR("get_int"); } + Int64 get_int(size_t /*n*/) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_int" + std::string(get_family_name())); + } - bool get_bool(size_t /*n*/) const override { THROW_NOT_IMPLEMENTED_ERROR("get_bool"); } + bool get_bool(size_t /*n*/) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_bool" + std::string(get_family_name())); + } void insert_many_fix_len_data(const char* pos, size_t num) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_fix_len_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_fix_len_data" + std::string(get_family_name())); } void insert_many_dict_data(const int32_t* data_array, size_t start_index, const StringRef* dict, size_t data_num, uint32_t dict_num = 0) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_dict_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_dict_data" + std::string(get_family_name())); } void insert_many_binary_data(char* data_array, uint32_t* len_array, uint32_t* start_offset_array, size_t num) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_binary_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_binary_data" + std::string(get_family_name())); } void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets, const size_t num) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_continuous_binary_data"); + throw doris::Exception( + ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_continuous_binary_data" + std::string(get_family_name())); } void insert_many_strings(const StringRef* strings, size_t num) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_strings"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_strings" + std::string(get_family_name())); } void insert_many_strings_overflow(const StringRef* strings, size_t num, size_t max_length) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_strings_overflow"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_strings_overflow" + std::string(get_family_name())); } void insert_many_raw_data(const char* pos, size_t num) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_many_raw_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_many_raw_data" + std::string(get_family_name())); } size_t get_max_row_byte_size() const override { - THROW_NOT_IMPLEMENTED_ERROR("get_max_row_byte_size"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_max_row_byte_size" + std::string(get_family_name())); } + void serialize_vec(std::vector& keys, size_t num_rows, size_t max_row_byte_size) const override { - THROW_NOT_IMPLEMENTED_ERROR("serialize_vec"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_vec" + std::string(get_family_name())); } void serialize_vec_with_null_map(std::vector& keys, size_t num_rows, const uint8_t* null_map) const override { - THROW_NOT_IMPLEMENTED_ERROR("serialize_vec_with_null_map"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_vec_with_null_map" + std::string(get_family_name())); } void deserialize_vec(std::vector& keys, const size_t num_rows) override { - THROW_NOT_IMPLEMENTED_ERROR("deserialize_vec"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_vec" + std::string(get_family_name())); } void deserialize_vec_with_null_map(std::vector& keys, const size_t num_rows, const uint8_t* null_map) override { - THROW_NOT_IMPLEMENTED_ERROR("deserialize_vec_with_null_map"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_vec_with_null_map" + std::string(get_family_name())); } - void update_hashes_with_value(uint64_t* __restrict hashes, - const uint8_t* __restrict null_data = nullptr) const override { - THROW_NOT_IMPLEMENTED_ERROR("update_hashes_with_value"); + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) const { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "filter_by_selector" + std::string(get_family_name())); } - void update_xxHash_with_value(size_t start, size_t end, uint64_t& hash, - const uint8_t* __restrict null_data) const override { - THROW_NOT_IMPLEMENTED_ERROR("update_xxHash_with_value"); + bool structure_equals(const IColumn&) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "structure_equals" + std::string(get_family_name())); } - void update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type, uint32_t rows, - uint32_t offset = 0, - const uint8_t* __restrict null_data = nullptr) const override { - THROW_NOT_IMPLEMENTED_ERROR("update_crcs_with_value"); + StringRef get_raw_data() const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_raw_data" + std::string(get_family_name())); } - void update_crc_with_value(size_t start, size_t end, uint32_t& hash, - const uint8_t* __restrict null_data) const override { - THROW_NOT_IMPLEMENTED_ERROR("update_crc_with_value"); - } - Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) const { - THROW_NOT_IMPLEMENTED_ERROR("filter_by_selector"); - } - bool structure_equals(const IColumn&) const override { - THROW_NOT_IMPLEMENTED_ERROR("structure_equals"); + size_t size_of_value_if_fixed() const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "size_of_value_if_fixed" + std::string(get_family_name())); } - StringRef get_raw_data() const override { THROW_NOT_IMPLEMENTED_ERROR("get_raw_data"); } - size_t size_of_value_if_fixed() const override { - THROW_NOT_IMPLEMENTED_ERROR("size_of_value_if_fixed"); + StringRef get_data_at(size_t) const override { + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "get_data_at" + std::string(get_family_name())); } - StringRef get_data_at(size_t) const override { THROW_NOT_IMPLEMENTED_ERROR("get_data_at"); } + StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override { - THROW_NOT_IMPLEMENTED_ERROR("serialize_value_into_arena"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "serialize_value_into_arena" + std::string(get_family_name())); } + const char* deserialize_and_insert_from_arena(const char* pos) override { - THROW_NOT_IMPLEMENTED_ERROR("deserialize_and_insert_from_arena"); + throw doris::Exception( + ErrorCode::NOT_IMPLEMENTED_ERROR, + "deserialize_and_insert_from_arena" + std::string(get_family_name())); } + void insert_data(const char* pos, size_t length) override { - THROW_NOT_IMPLEMENTED_ERROR("insert_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "insert_data" + std::string(get_family_name())); } + void replace_column_data(const IColumn&, size_t row, size_t self_row) override { - THROW_NOT_IMPLEMENTED_ERROR("replace_column_data"); + throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, + "replace_column_data" + std::string(get_family_name())); } };