From c5125b935b405c4a7e6608a8a698a13373636794 Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Mon, 16 Oct 2023 18:56:57 +0800 Subject: [PATCH 1/2] [opt](index compaction) optimize checks before index compaction --- be/src/olap/compaction.cpp | 42 +++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 268c62847b16e0..c5c6f548189b89 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -47,6 +47,7 @@ #include "olap/rowset/rowset_writer.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/rowset/segment_v2/inverted_index_compaction.h" +#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" #include "olap/storage_engine.h" #include "olap/storage_policy.h" #include "olap/tablet.h" @@ -546,6 +547,8 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool BetaRowsetSharedPtr rowset = std::static_pointer_cast(src_rs); if (rowset == nullptr) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] rowset is null, will skip index compaction"; return false; } auto fs = rowset->rowset_meta()->fs(); @@ -553,6 +556,8 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool auto index_meta = rowset->tablet_schema()->get_inverted_index(unique_id); if (index_meta == nullptr) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" << unique_id + << "] index meta is null, will skip index compaction"; return false; } for (auto i = 0; i < rowset->num_segments(); i++) { @@ -567,10 +572,45 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool return false; } if (!exists) { - LOG(WARNING) << inverted_index_src_file_path + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" + << unique_id << "]," << inverted_index_src_file_path << " is not exists, will skip index compaction"; return false; } + + // check idx file size + int64_t file_size = 0; + if (fs->file_size(inverted_index_src_file_path, &file_size) != + Status::OK()) { + LOG(ERROR) + << inverted_index_src_file_path << " fs->file_size error"; + return false; + } + if (file_size == 0) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" + << unique_id << "]," << inverted_index_src_file_path + << " is empty file, will skip index compaction"; + return false; + } + + // check index meta + std::filesystem::path p(inverted_index_src_file_path); + std::string dir_str = p.parent_path().string(); + std::string file_str = p.filename().string(); + lucene::store::Directory* dir = + DorisCompoundDirectory::getDirectory(fs, dir_str.c_str()); + auto reader = new DorisCompoundReader(dir, file_str.c_str()); + std::vector files; + reader->list(&files); + + // why is 3? + // bkd index will write at least 3 files + if (files.size() < 3) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" + << unique_id << "]," << inverted_index_src_file_path + << " is corrupted, will skip index compaction"; + return false; + } } return true; }); From d91ce9bd594e619186863eabc9209bec707479ff Mon Sep 17 00:00:00 2001 From: Jianliang Qi Date: Mon, 16 Oct 2023 19:01:49 +0800 Subject: [PATCH 2/2] format code --- be/src/olap/compaction.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index c5c6f548189b89..4855fdad0b9c26 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -556,7 +556,8 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool auto index_meta = rowset->tablet_schema()->get_inverted_index(unique_id); if (index_meta == nullptr) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" << unique_id + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "] index meta is null, will skip index compaction"; return false; } @@ -572,8 +573,9 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool return false; } if (!exists) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" - << unique_id << "]," << inverted_index_src_file_path + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path << " is not exists, will skip index compaction"; return false; } @@ -582,13 +584,14 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool int64_t file_size = 0; if (fs->file_size(inverted_index_src_file_path, &file_size) != Status::OK()) { - LOG(ERROR) - << inverted_index_src_file_path << " fs->file_size error"; + LOG(ERROR) << inverted_index_src_file_path + << " fs->file_size error"; return false; } if (file_size == 0) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" - << unique_id << "]," << inverted_index_src_file_path + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path << " is empty file, will skip index compaction"; return false; } @@ -606,8 +609,9 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool // why is 3? // bkd index will write at least 3 files if (files.size() < 3) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index_unique_id[" - << unique_id << "]," << inverted_index_src_file_path + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path << " is corrupted, will skip index compaction"; return false; }