From 9a00591de62dd5da14ae8f4e1da090135ccf8cfb Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Sun, 4 Feb 2024 21:58:06 +0800 Subject: [PATCH] [Improve](Tablet Schema) Use deterministic way to serialize protobuf (#101) --- be/src/olap/rowset/rowset_meta.cpp | 3 ++- be/src/olap/tablet_schema.cpp | 17 +++++++++++++++-- be/src/olap/tablet_schema.h | 11 +++++++---- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/be/src/olap/rowset/rowset_meta.cpp b/be/src/olap/rowset/rowset_meta.cpp index 78bc4b7fe622a1..125089a28626e9 100644 --- a/be/src/olap/rowset/rowset_meta.cpp +++ b/be/src/olap/rowset/rowset_meta.cpp @@ -133,7 +133,8 @@ void RowsetMeta::set_tablet_schema(const TabletSchemaPB& tablet_schema) { if (_handle) { TabletSchemaCache::instance()->release(_handle); } - auto pair = TabletSchemaCache::instance()->insert(tablet_schema.SerializeAsString()); + auto pair = TabletSchemaCache::instance()->insert( + TabletSchema::deterministic_string_serialize(tablet_schema)); _handle = pair.first; _schema = pair.second; } diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index bd54af19603ebe..e02833277ee48b 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #include @@ -27,6 +30,7 @@ #include // IWYU pragma: keep #include #include +#include #include "common/compiler_util.h" // IWYU pragma: keep #include "common/consts.h" @@ -770,7 +774,7 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const { index->add_col_unique_id(col_unique_id); } index->set_index_type(_index_type); - for (auto& kv : _properties) { + for (const auto& kv : _properties) { (*index->mutable_properties())[kv.first] = kv.second; } index->set_index_suffix_name(_escaped_index_suffix_path); @@ -929,7 +933,7 @@ void TabletSchema::copy_from(const TabletSchema& tablet_schema) { std::string TabletSchema::to_key() const { TabletSchemaPB pb; to_schema_pb(&pb); - return pb.SerializeAsString(); + return TabletSchema::deterministic_string_serialize(pb); } void TabletSchema::build_current_tablet_schema(int64_t index_id, int32_t version, @@ -1382,4 +1386,13 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) { return !(a == b); } +std::string TabletSchema::deterministic_string_serialize(const TabletSchemaPB& schema_pb) { + std::string output; + google::protobuf::io::StringOutputStream string_output_stream(&output); + google::protobuf::io::CodedOutputStream output_stream(&string_output_stream); + output_stream.SetSerializationDeterministic(true); + schema_pb.SerializeToCodedStream(&output_stream); + return output; +} + } // namespace doris diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index a515095814ffe4..3ca4392584327d 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -240,6 +240,9 @@ class TabletSchema { // void create_from_pb(const TabletSchemaPB& schema, TabletSchema* tablet_schema). TabletSchema() = default; void init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns = false); + // Notice: Use deterministic way to serialize protobuf, + // since serialize Map in protobuf may could lead to un-deterministic by default + static std::string deterministic_string_serialize(const TabletSchemaPB& schema_pb); void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const; void append_column(TabletColumn column, ColumnType col_type = ColumnType::NORMAL); void append_index(TabletIndex index); @@ -363,17 +366,17 @@ class TabletSchema { // Dump [(name, type, is_nullable), ...] string dump_structure() const { string str = "["; - for (auto p : _field_name_to_index) { + for (auto p : _cols) { if (str.size() > 1) { str += ", "; } str += "("; - str += p.first; + str += p.name(); str += ", "; - str += TabletColumn::get_string_by_field_type(_cols[p.second].type()); + str += TabletColumn::get_string_by_field_type(p.type()); str += ", "; str += "is_nullable:"; - str += (_cols[p.second].is_nullable() ? "true" : "false"); + str += (p.is_nullable() ? "true" : "false"); str += ")"; } str += "]";