From 19de3d6323317d2b52363fa785c9235747ba823d Mon Sep 17 00:00:00 2001 From: yangzhg Date: Wed, 10 Jun 2020 19:50:08 +0800 Subject: [PATCH 1/6] Add segment v2 footer meta viewer --- be/src/tools/meta_tool.cpp | 103 +++++++++++++++++++++++++++++++++++-- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 53d6fdaf14322f..9e4ac246257725 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -34,6 +34,13 @@ #include "json2pb/pb_to_json.h" #include "gutil/strings/split.h" #include "gutil/strings/numbers.h" +#include "gutil/strings/substitute.h" +#include "util/coding.h" +#include "util/crc32c.h" +#include "olap/rowset/segment_v2/column_reader.h" +#include "olap/rowset/segment_v2/binary_plain_page.h" +#include "gen_cpp/segment_v2.pb.h" +#include "env/env.h" using boost::filesystem::path; using doris::DataDir; @@ -44,6 +51,17 @@ using doris::Status; using doris::TabletMeta; using doris::TabletMetaManager; using doris::FileUtils; +using doris::Slice; +using doris::RandomAccessFile; +using strings::Substitute; +using doris::segment_v2::SegmentFooterPB; +using doris::segment_v2::ColumnReader; +using doris::segment_v2::BinaryPlainPageDecoder; +using doris::segment_v2::PageHandle; +using doris::segment_v2::PagePointer; +using doris::segment_v2::ColumnReaderOptions; +using doris::segment_v2::ColumnIteratorOptions; +using doris::segment_v2::PageFooterPB; const std::string HEADER_PREFIX = "tabletmeta_"; @@ -56,6 +74,7 @@ DEFINE_int32(schema_hash, 0, "schema_hash for tablet meta"); DEFINE_string(json_meta_path, "", "absolute json meta file path"); DEFINE_string(pb_meta_path, "", "pb meta file path"); DEFINE_string(tablet_file, "", "file to save a set of tablets"); +DEFINE_string(file, "", "segment file path"); std::string get_usage(const std::string& progname) { std::stringstream ss; @@ -70,7 +89,8 @@ std::string get_usage(const std::string& progname) { "--root_path=/path/to/storage/path --tablet_id=tabletid " "--schema_hash=schemahash\n"; ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; - ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; + ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n";\ + ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; return ss.str(); } @@ -235,6 +255,77 @@ void batch_delete_meta(const std::string& tablet_file) { return; } +Status get_segment_footer(RandomAccessFile* input_file, SegmentFooterPB* footer) { + // Footer := SegmentFooterPB, FooterPBSize(4), FooterPBChecksum(4), MagicNumber(4) + std::string file_name = input_file->file_name(); + uint64_t file_size; + RETURN_IF_ERROR(input_file->size(&file_size)); + + if (file_size < 12) { + return Status::Corruption(Substitute("Bad segment file $0: file size $1 < 12", file_name, file_size)); + } + + uint8_t fixed_buf[12]; + RETURN_IF_ERROR(input_file->read_at(file_size - 12, Slice(fixed_buf, 12))); + + // validate magic number + const char* k_segment_magic = "D0R1"; + const uint32_t k_segment_magic_length = 4; + if (memcmp(fixed_buf + 8, k_segment_magic, k_segment_magic_length) != 0) { + return Status::Corruption(Substitute("Bad segment file $0: magic number not match", file_name)); + } + + // read footer PB + uint32_t footer_length = doris::decode_fixed32_le(fixed_buf); + if (file_size < 12 + footer_length) { + return Status::Corruption( + Substitute("Bad segment file $0: file size $1 < $2", file_name, file_size, 12 + footer_length)); + } + std::string footer_buf; + footer_buf.resize(footer_length); + RETURN_IF_ERROR(input_file->read_at(file_size - 12 - footer_length, footer_buf)); + + // validate footer PB's checksum + uint32_t expect_checksum = doris::decode_fixed32_le(fixed_buf + 4); + uint32_t actual_checksum = doris::crc32c::Value(footer_buf.data(), footer_buf.size()); + if (actual_checksum != expect_checksum) { + return Status::Corruption( + Substitute("Bad segment file $0: footer checksum not match, actual=$1 vs expect=$2", + file_name, actual_checksum, expect_checksum)); + } + + // deserialize footer PB + if (!footer->ParseFromString(footer_buf)) { + return Status::Corruption(Substitute("Bad segment file $0: failed to parse SegmentFooterPB", file_name)); + } + return Status::OK(); +} + +void show_segment_footer(const std::string& file_name) { + std::unique_ptr input_file; + Status status = doris::Env::Default()->new_random_access_file(file_name, &input_file); + if (!status.ok()) { + std::cout << "open file failed: " << status.to_string() << std::endl; + return; + } + SegmentFooterPB footer; + status = get_segment_footer(input_file.get(), &footer); + if (!status.ok()) { + std::cout << "get footer failed: " << status.to_string() << std::endl; + return; + } + std::string json_footer; + json2pb::Pb2JsonOptions json_options; + json_options.pretty_json = true; + bool ret = json2pb::ProtoMessageToJson(footer, &json_footer, json_options); + if (!ret) { + std::cout << "Convert PB to json failed" << std::endl; + return; + } + std::cout << json_footer << std::endl; + return; +} + int main(int argc, char** argv) { std::string usage = get_usage(argv[0]); gflags::SetUsageMessage(usage); @@ -252,6 +343,12 @@ int main(int argc, char** argv) { } batch_delete_meta(tablet_file); + } else if (FLAGS_operation == "show_segment_footer") { + if (FLAGS_file == "") { + std::cout << "no file flag for show dict" << std::endl; + return -1; + } + show_segment_footer(FLAGS_file); } else { // operations that need root path should be written here std::set valid_operations = {"get_meta", "load_meta", @@ -276,11 +373,11 @@ int main(int argc, char** argv) { } else if (FLAGS_operation == "delete_meta") { delete_meta(data_dir.get()); } else { - std::cout << "invalid operation:" << FLAGS_operation << "\n" + std::cout << "invalid operation: " << FLAGS_operation << "\n" << usage << std::endl; return -1; } } gflags::ShutDownCommandLineFlags(); return 0; -} +} \ No newline at end of file From dd438ba0d310bed9a29526319013dbd1b25b6335 Mon Sep 17 00:00:00 2001 From: yangzhg Date: Wed, 10 Jun 2020 19:54:48 +0800 Subject: [PATCH 2/6] Add segment v2 footer meta viewer --- be/src/tools/meta_tool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 9e4ac246257725..35478f0696fb88 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -89,7 +89,7 @@ std::string get_usage(const std::string& progname) { "--root_path=/path/to/storage/path --tablet_id=tabletid " "--schema_hash=schemahash\n"; ss << "./meta_tool --operation=delete_meta --tablet_file=file_path\n"; - ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n";\ + ss << "./meta_tool --operation=show_meta --pb_meta_path=path\n"; ss << "./meta_tool --operation=show_segment_footer --file=/path/to/segment/file\n"; return ss.str(); } From ef6565974b2507c8f526a4be7db768061793f07d Mon Sep 17 00:00:00 2001 From: yangzhg Date: Wed, 10 Jun 2020 19:55:15 +0800 Subject: [PATCH 3/6] Add segment v2 footer meta viewer --- be/src/tools/meta_tool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index 35478f0696fb88..e63393bad59dc2 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -380,4 +380,4 @@ int main(int argc, char** argv) { } gflags::ShutDownCommandLineFlags(); return 0; -} \ No newline at end of file +} From 6854027696401e0c51ab5985bfc85cd0a9fd7b6b Mon Sep 17 00:00:00 2001 From: yangzhg Date: Thu, 11 Jun 2020 10:08:00 +0800 Subject: [PATCH 4/6] add docs --- be/src/tools/meta_tool.cpp | 25 ++++++++++--------- .../operation/tablet-meta-tool.md | 16 +++++++++--- .../operation/tablet-meta-tool.md | 9 +++++++ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/be/src/tools/meta_tool.cpp b/be/src/tools/meta_tool.cpp index e63393bad59dc2..916dd92875abae 100644 --- a/be/src/tools/meta_tool.cpp +++ b/be/src/tools/meta_tool.cpp @@ -15,32 +15,33 @@ // specific language governing permissions and limitations // under the License. -#include #include #include #include +#include + #include #include #include "common/status.h" -#include "util/file_utils.h" +#include "env/env.h" #include "gen_cpp/olap_file.pb.h" -#include "olap/options.h" +#include "gen_cpp/segment_v2.pb.h" +#include "gutil/strings/numbers.h" +#include "gutil/strings/split.h" +#include "gutil/strings/substitute.h" +#include "json2pb/pb_to_json.h" #include "olap/data_dir.h" -#include "olap/tablet_meta_manager.h" #include "olap/olap_define.h" +#include "olap/options.h" +#include "olap/rowset/segment_v2/binary_plain_page.h" +#include "olap/rowset/segment_v2/column_reader.h" +#include "olap/tablet_meta_manager.h" #include "olap/tablet_meta.h" #include "olap/utils.h" -#include "json2pb/pb_to_json.h" -#include "gutil/strings/split.h" -#include "gutil/strings/numbers.h" -#include "gutil/strings/substitute.h" #include "util/coding.h" #include "util/crc32c.h" -#include "olap/rowset/segment_v2/column_reader.h" -#include "olap/rowset/segment_v2/binary_plain_page.h" -#include "gen_cpp/segment_v2.pb.h" -#include "env/env.h" +#include "util/file_utils.h" using boost::filesystem::path; using doris::DataDir; diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md index 7cc309fc7b4b00..7f389d45a35fbb 100644 --- a/docs/en/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/en/administrator-guide/operation/tablet-meta-tool.md @@ -71,7 +71,7 @@ If the final query is successful, the Tablet Meta will be returned as json. Get Tablet Meta on a disk based on the meta\ tool tool. -Order: +Command: ``` ./lib/meta_tool --root_path=/path/to/root_path --operation=get_meta --tablet_id=xxx --schema_hash=xxx @@ -85,7 +85,7 @@ The result is also a presentation of Tablet Meta in JSON format. The function of loading header is provided to realize manual migration of tablet. This function is based on Tablet Meta in JSON format, so if changes in the shard field and version information are involved, they can be changed directly in the JSON content of Tablet Meta. Then use the following commands to load. -Order: +Command: ``` ./lib/meta_tool --operation=load_meta --root_path=/path/to/root_path --json_header_path=path @@ -130,8 +130,18 @@ Batch delete will skip the line with incorrect tablet information format in `tab This command is to view the old file-based management PB format Tablet Meta, and to display Tablet Meta in JSON format. -Order: +Command: ``` ./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` + +### Segment Footer in Pb format + +This command is to view the SegmentV2 PB format SegmentFooterPB, and to display SegmentFooterPB in JSON format. + +Command: + +``` +./meta_tool --operation=show_segment_footer --file=/path/to/segment/file +``` diff --git a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md index b44588f0ffd48e..872980ec6c3c7c 100644 --- a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md @@ -136,4 +136,13 @@ api: ./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` +### 展示 pb 格式的 Segment Footer + +这个命令是为了查看SegmentV2 的SegmentFooterPB信息,以json 形式展示出来 + +命令: + +``` +./meta_tool --operation=show_segment_footer --file=/path/to/segment/file + From cae1c35423729a38c68d41581f81b426aada4f41 Mon Sep 17 00:00:00 2001 From: yangzhg Date: Tue, 16 Jun 2020 14:24:55 +0800 Subject: [PATCH 5/6] xxx --- docs/en/administrator-guide/operation/tablet-meta-tool.md | 6 +++--- .../zh-CN/administrator-guide/operation/tablet-meta-tool.md | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md index 7f389d45a35fbb..ce89f8e2b02010 100644 --- a/docs/en/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/en/administrator-guide/operation/tablet-meta-tool.md @@ -133,12 +133,12 @@ This command is to view the old file-based management PB format Tablet Meta, and Command: ``` -./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path +./lib/meta_tool --operation=how_meta --root_path=/path/to/root_path --pb_header_path=path ``` -### Segment Footer in Pb format +### Segment meta in Pb format -This command is to view the SegmentV2 PB format SegmentFooterPB, and to display SegmentFooterPB in JSON format. +This command is to view the PB format segment meta, and to display segment meta in JSON format. Command: diff --git a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md index 872980ec6c3c7c..386b5bb5330e49 100644 --- a/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/zh-CN/administrator-guide/operation/tablet-meta-tool.md @@ -136,9 +136,9 @@ api: ./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` -### 展示 pb 格式的 Segment Footer +### 展示 pb 格式的 Segment meta -这个命令是为了查看SegmentV2 的SegmentFooterPB信息,以json 形式展示出来 +这个命令是为了查看SegmentV2 的segment meta信息,以json 形式展示出来 命令: From bd98a8a23f8ed2ec9a0696dda4cab0b91e061701 Mon Sep 17 00:00:00 2001 From: yangzhg <780531911@qq.com> Date: Thu, 18 Jun 2020 14:41:49 +0800 Subject: [PATCH 6/6] Update docs/en/administrator-guide/operation/tablet-meta-tool.md Co-authored-by: Mingyu Chen --- docs/en/administrator-guide/operation/tablet-meta-tool.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/administrator-guide/operation/tablet-meta-tool.md b/docs/en/administrator-guide/operation/tablet-meta-tool.md index ce89f8e2b02010..680519a13ca5d4 100644 --- a/docs/en/administrator-guide/operation/tablet-meta-tool.md +++ b/docs/en/administrator-guide/operation/tablet-meta-tool.md @@ -133,7 +133,7 @@ This command is to view the old file-based management PB format Tablet Meta, and Command: ``` -./lib/meta_tool --operation=how_meta --root_path=/path/to/root_path --pb_header_path=path +./lib/meta_tool --operation=show_meta --root_path=/path/to/root_path --pb_header_path=path ``` ### Segment meta in Pb format