diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp index 36eb4e325ee866..a400e797e03f77 100644 --- a/cloud/src/recycler/checker.cpp +++ b/cloud/src/recycler/checker.cpp @@ -39,6 +39,7 @@ #include "common/bvars.h" #include "common/config.h" +#include "common/defer.h" #include "common/encryption_util.h" #include "common/logging.h" #include "common/util.h" @@ -497,12 +498,24 @@ int InstanceChecker::do_check() { }; TabletFiles tablet_files_cache; - auto check_rowset_objects = [&, this](const doris::RowsetMetaCloudPB& rs_meta, - std::string_view key) { + auto check_rowset_objects = [&, this](doris::RowsetMetaCloudPB& rs_meta, std::string_view key) { if (rs_meta.num_segments() == 0) { return; } + bool data_loss = false; + bool segment_file_loss = false; + bool index_file_loss = false; + + DORIS_CLOUD_DEFER { + if (data_loss) { + LOG(INFO) << "segment file is" << (segment_file_loss ? "" : " not") << " loss, " + << "index file is" << (index_file_loss ? "" : " not") << " loss, " + << "rowset.tablet_id = " << rs_meta.tablet_id(); + num_rowset_loss++; + } + }; + ++num_scanned_with_segment; if (tablet_files_cache.tablet_id != rs_meta.tablet_id()) { long tablet_volume = 0; @@ -536,7 +549,6 @@ int InstanceChecker::do_check() { instance_volume += tablet_volume; } - bool data_loss = false; for (int i = 0; i < rs_meta.num_segments(); ++i) { auto path = segment_path(rs_meta.tablet_id(), rs_meta.rowset_id_v2(), i); @@ -549,11 +561,36 @@ int InstanceChecker::do_check() { break; } data_loss = true; + segment_file_loss = true; TEST_SYNC_POINT_CALLBACK("InstanceChecker.do_check1", &path); LOG(WARNING) << "object not exist, path=" << path << ", rs_meta=" << rs_meta.ShortDebugString() << " key=" << hex(key); } + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to init txn, err=" << err; + return; + } + + TabletIndexPB tablet_index; + if (get_tablet_idx(txn_kv_.get(), instance_id_, rs_meta.tablet_id(), tablet_index) == -1) { + LOG(WARNING) << "failedt to get tablet index, tablet_id= " << rs_meta.tablet_id(); + return; + } + + auto tablet_schema_key = + meta_schema_key({instance_id_, tablet_index.index_id(), rs_meta.schema_version()}); + std::string tablet_schema_val; + err = txn->get(tablet_schema_key, &tablet_schema_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + // rowset don't have tablet schema key means no index + return; + } + auto* schema = rs_meta.mutable_tablet_schema(); + schema->ParseFromString(tablet_schema_val); + std::vector> index_ids; for (const auto& i : rs_meta.tablet_schema().index()) { if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { @@ -564,7 +601,6 @@ int InstanceChecker::do_check() { if (!key_exist(txn_kv_.get(), tablet_idx_key)) { for (int i = 0; i < rs_meta.num_segments(); ++i) { std::vector index_path_v; - std::vector loss_file_path; if (rs_meta.tablet_schema().inverted_index_storage_format() == InvertedIndexStorageFormatPB::V1) { for (const auto& index_id : index_ids) { @@ -582,32 +618,17 @@ int InstanceChecker::do_check() { } if (!index_path_v.empty()) { - if (std::all_of(index_path_v.begin(), index_path_v.end(), - [&](const auto& idx_file_path) { - if (!tablet_files_cache.files.contains(idx_file_path)) { - loss_file_path.emplace_back(idx_file_path); - return false; - } - return true; - })) { + if (std::ranges::all_of(index_path_v, [&](const auto& idx_file_path) { + return tablet_files_cache.files.contains(idx_file_path); + })) { continue; } } - + index_file_loss = true; data_loss = true; - LOG(WARNING) << "object not exist, path=" - << std::accumulate(loss_file_path.begin(), loss_file_path.end(), - std::string(), - [](const auto& a, const auto& b) { - return a.empty() ? b : a + ", " + b; - }) - << " key=" << hex(tablet_idx_key); + LOG(WARNING) << "object not exist, key=" << hex(tablet_idx_key); } } - - if (data_loss) { - ++num_rowset_loss; - } }; // scan visible rowsets @@ -632,7 +653,9 @@ int InstanceChecker::do_check() { while (it->has_next() && !stopped()) { auto [k, v] = it->next(); - if (!it->has_next()) start_key = k; + if (!it->has_next()) { + start_key = k; + } doris::RowsetMetaCloudPB rs_meta; if (!rs_meta.ParseFromArray(v.data(), v.size())) { @@ -703,11 +726,8 @@ int InstanceChecker::do_inverted_check() { }; TabletRowsets tablet_rowsets_cache; - struct TabletIndexes { - int64_t tablet_id {0}; - std::unordered_set index_ids; - }; - TabletIndexes tablet_indexes_cache; + RowsetIndexesFormatV1 rowset_index_cache_v1; + RowsetIndexesFormatV2 rowset_index_cache_v2; // Return 0 if check success, return 1 if file is garbage data, negative if error occurred auto check_segment_file = [&](const std::string& obj_key) { @@ -786,10 +806,12 @@ int InstanceChecker::do_inverted_check() { return 0; }; + auto check_inverted_index_file = [&](const std::string& obj_key) { std::vector str; butil::SplitString(obj_key, '/', &str); - // data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx + // format v1: data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx + // format v2: data/{tablet_id}/{rowset_id}_{seg_num}.idx if (str.size() < 3) { return -1; } @@ -800,62 +822,31 @@ int InstanceChecker::do_inverted_check() { return -1; } - if (!str.back().ends_with(".idx")) { + // v1: {rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx + // v2: {rowset_id}_{seg_num}.idx + std::string rowset_info = str.back(); + + if (!rowset_info.ends_with(".idx")) { return 0; // Not an index file } - int64_t index_id; + InvertedIndexStorageFormatPB inverted_index_storage_format = + std::count(rowset_info.begin(), rowset_info.end(), '_') > 1 + ? InvertedIndexStorageFormatPB::V1 + : InvertedIndexStorageFormatPB::V2; - size_t pos = str.back().find_last_of('_'); + size_t pos = rowset_info.find_last_of('_'); if (pos == std::string::npos || pos + 1 >= str.back().size() - 4) { LOG(WARNING) << "Invalid index_id format, key=" << obj_key; return -1; } - index_id = atol(str.back().substr(pos + 1, str.back().size() - 4).c_str()); - - if (tablet_indexes_cache.tablet_id == tablet_id) { - if (tablet_indexes_cache.index_ids.contains(index_id)) { - return 0; - } else { - LOG(WARNING) << "index not exists, key=" << obj_key; - return -1; - } - } - // Get all index id of this tablet - tablet_indexes_cache.tablet_id = tablet_id; - tablet_indexes_cache.index_ids.clear(); - std::unique_ptr txn; - TxnErrorCode err = txn_kv_->create_txn(&txn); - if (err != TxnErrorCode::TXN_OK) { - LOG(WARNING) << "failed to create txn"; - return -1; - } - auto tablet_idx_key = meta_tablet_idx_key({instance_id_, tablet_id}); - std::string tablet_idx_val; - err = txn->get(tablet_idx_key, &tablet_idx_val); - if (err != TxnErrorCode::TXN_OK) { - LOG(WARNING) << "failed to get tablet idx," - << " key=" << hex(tablet_idx_key) << " err=" << err; - return -1; - } - - TabletIndexPB tablet_idx_pb; - if (!tablet_idx_pb.ParseFromArray(tablet_idx_val.data(), tablet_idx_val.size())) { - LOG(WARNING) << "malformed index meta value, key=" << hex(tablet_idx_key); - return -1; - } - if (!tablet_idx_pb.has_index_id()) { - LOG(WARNING) << "tablet index meta does not have index_id, key=" << hex(tablet_idx_key); - return -1; - } - tablet_indexes_cache.index_ids.insert(tablet_idx_pb.index_id()); - - if (!tablet_indexes_cache.index_ids.contains(index_id)) { - LOG(WARNING) << "index should be recycled, key=" << obj_key; - return 1; + if (inverted_index_storage_format == InvertedIndexStorageFormatPB::V1) { + return check_inverted_index_file_storage_format_v1(tablet_id, obj_key, rowset_info, + rowset_index_cache_v1); + } else { + return check_inverted_index_file_storage_format_v2(tablet_id, obj_key, rowset_info, + rowset_index_cache_v2); } - - return 0; }; // so we choose to skip here. TEST_SYNC_POINT_RETURN_WITH_VALUE("InstanceChecker::do_inverted_check", (int)0); @@ -1199,6 +1190,202 @@ int InstanceChecker::do_delete_bitmap_inverted_check() { return (leaked_delete_bitmaps > 0 || abnormal_delete_bitmaps > 0) ? 1 : 0; } +int InstanceChecker::check_inverted_index_file_storage_format_v1( + int64_t tablet_id, const std::string& file_path, const std::string& rowset_info, + RowsetIndexesFormatV1& rowset_index_cache_v1) { + // format v1: data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx + std::string rowset_id; + int64_t segment_id; + std::string index_id_with_suffix_name; + // {rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx + std::vector str; + butil::SplitString(rowset_info.substr(0, rowset_info.size() - 4), '_', &str); + if (str.size() < 3) { + LOG(WARNING) << "Split rowset info with '_' error, str size < 3, rowset_info = " + << rowset_info; + return -1; + } + rowset_id = str[0]; + segment_id = std::atoll(str[1].c_str()); + index_id_with_suffix_name = str[2]; + + if (rowset_index_cache_v1.rowset_id == rowset_id) { + if (rowset_index_cache_v1.segment_ids.contains(segment_id)) { + if (auto it = rowset_index_cache_v1.index_ids.find(index_id_with_suffix_name); + it == rowset_index_cache_v1.index_ids.end()) { + // clang-format off + LOG(WARNING) << fmt::format("index_id with suffix name not found, rowset_info = {}, obj_key = {}", rowset_info, file_path); + // clang-format on + return -1; + } + } else { + // clang-format off + LOG(WARNING) << fmt::format("segment id not found, rowset_info = {}, obj_key = {}", rowset_info, file_path); + // clang-format on + return -1; + } + } + + rowset_index_cache_v1.rowset_id = rowset_id; + rowset_index_cache_v1.segment_ids.clear(); + rowset_index_cache_v1.index_ids.clear(); + + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to create txn"; + return -1; + } + std::unique_ptr it; + auto begin = meta_rowset_key({instance_id_, tablet_id, 0}); + auto end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX}); + do { + TxnErrorCode err = txn->get(begin, end, &it); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to get rowset kv, err=" << err; + return -1; + } + if (!it->has_next()) { + break; + } + while (it->has_next()) { + // recycle corresponding resources + auto [k, v] = it->next(); + doris::RowsetMetaCloudPB rs_meta; + if (!rs_meta.ParseFromArray(v.data(), v.size())) { + LOG(WARNING) << "malformed rowset meta value, key=" << hex(k); + return -1; + } + + for (size_t i = 0; i < rs_meta.num_segments(); i++) { + rowset_index_cache_v1.segment_ids.insert(i); + } + + TabletIndexPB tablet_index; + if (get_tablet_idx(txn_kv_.get(), instance_id_, rs_meta.tablet_id(), tablet_index) == + -1) { + LOG(WARNING) << "failedt to get tablet index, tablet_id= " << rs_meta.tablet_id(); + return -1; + } + + auto tablet_schema_key = meta_schema_key( + {instance_id_, tablet_index.index_id(), rs_meta.schema_version()}); + std::string tablet_schema_val; + err = txn->get(tablet_schema_key, &tablet_schema_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + // rowset don't have tablet schema key means no index + return 0; + } + auto* schema = rs_meta.mutable_tablet_schema(); + schema->ParseFromString(tablet_schema_val); + + for (const auto& i : rs_meta.tablet_schema().index()) { + if (i.has_index_type() && i.index_type() == IndexType::INVERTED) { + rowset_index_cache_v1.index_ids.insert( + fmt::format("{}{}", i.index_name(), i.index_suffix_name())); + } + } + + if (!it->has_next()) { + begin = k; + begin.push_back('\x00'); // Update to next smallest key for iteration + break; + } + } + } while (it->more() && !stopped()); + + if (!rowset_index_cache_v1.segment_ids.contains(segment_id)) { + // Garbage data leak + LOG(WARNING) << "rowset should be recycled, key=" << file_path; + return 1; + } + + if (!rowset_index_cache_v1.index_ids.contains(index_id_with_suffix_name)) { + // Garbage data leak + LOG(WARNING) << "rowset with inde meta should be recycled, key=" << file_path; + return 1; + } + + return 0; +} + +int InstanceChecker::check_inverted_index_file_storage_format_v2( + int64_t tablet_id, const std::string& file_path, const std::string& rowset_info, + RowsetIndexesFormatV2& rowset_index_cache_v2) { + std::string rowset_id; + int64_t segment_id; + // {rowset_id}_{seg_num}.idx + std::vector str; + butil::SplitString(rowset_info.substr(0, rowset_info.size() - 4), '_', &str); + if (str.size() < 2) { + // clang-format off + LOG(WARNING) << "Split rowset info with '_' error, str size < 2, rowset_info = " << rowset_info; + // clang-format on + return -1; + } + rowset_id = str[0]; + segment_id = std::atoll(str[1].c_str()); + + if (rowset_index_cache_v2.rowset_id == rowset_id) { + if (!rowset_index_cache_v2.segment_ids.contains(segment_id)) { + // clang-format off + LOG(WARNING) << fmt::format("index file not found, rowset_info = {}, obj_key = {}", rowset_info, file_path); + // clang-format on + return -1; + } + } + + rowset_index_cache_v2.rowset_id = rowset_id; + rowset_index_cache_v2.segment_ids.clear(); + + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to create txn"; + return -1; + } + std::unique_ptr it; + auto begin = meta_rowset_key({instance_id_, tablet_id, 0}); + auto end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX}); + do { + TxnErrorCode err = txn->get(begin, end, &it); + if (err != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to get rowset kv, err=" << err; + return -1; + } + if (!it->has_next()) { + break; + } + while (it->has_next()) { + // recycle corresponding resources + auto [k, v] = it->next(); + doris::RowsetMetaCloudPB rs_meta; + if (!rs_meta.ParseFromArray(v.data(), v.size())) { + LOG(WARNING) << "malformed rowset meta value, key=" << hex(k); + return -1; + } + + for (size_t i = 0; i < rs_meta.num_segments(); i++) { + rowset_index_cache_v2.segment_ids.insert(i); + } + + if (!it->has_next()) { + begin = k; + begin.push_back('\x00'); // Update to next smallest key for iteration + break; + } + } + } while (it->more() && !stopped()); + + if (!rowset_index_cache_v2.segment_ids.contains(segment_id)) { + // Garbage data leak + LOG(WARNING) << "rowset with index meta should be recycled, key=" << file_path; + return 1; + } + + return 0; +} + int InstanceChecker::check_delete_bitmap_storage_optimize(int64_t tablet_id) { using Version = std::pair; struct RowsetDigest { diff --git a/cloud/src/recycler/checker.h b/cloud/src/recycler/checker.h index 7f87e90f7cb366..37cca76c85add3 100644 --- a/cloud/src/recycler/checker.h +++ b/cloud/src/recycler/checker.h @@ -110,6 +110,18 @@ class InstanceChecker { void stop() { stopped_.store(true, std::memory_order_release); } bool stopped() const { return stopped_.load(std::memory_order_acquire); } +private: + struct RowsetIndexesFormatV1 { + std::string rowset_id; + std::unordered_set segment_ids; + std::unordered_set index_ids; + }; + + struct RowsetIndexesFormatV2 { + std::string rowset_id; + std::unordered_set segment_ids; + }; + private: // returns 0 for success otherwise error int init_obj_store_accessors(const InstanceInfoPB& instance); @@ -128,6 +140,14 @@ class InstanceChecker { int check_delete_bitmap_storage_optimize(int64_t tablet_id); + int check_inverted_index_file_storage_format_v1(int64_t tablet_id, const std::string& file_path, + const std::string& rowset_info, + RowsetIndexesFormatV1& rowset_index_cache_v1); + + int check_inverted_index_file_storage_format_v2(int64_t tablet_id, const std::string& file_path, + const std::string& rowset_info, + RowsetIndexesFormatV2& rowset_index_cache_v2); + std::atomic_bool stopped_ {false}; std::shared_ptr txn_kv_; std::string instance_id_; diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index bc920c78d9a6bb..57a075eb5b6d27 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -21,10 +21,13 @@ #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -56,7 +59,95 @@ static int64_t current_time = 0; static constexpr int64_t db_id = 1000; static RecyclerMetricsContext ctx; -static doris::cloud::RecyclerThreadPoolGroup thread_group; +std::vector index_v2_file_path = { + "data/1753202639945/0200000000001a5c92f4e7d9j8f2b4c8a3e6f8b1c9d2e5f8_0.idx", + "data/1753202639947/0200000000001b8d45a74r6c7sf3e9c2b6d4a8e1f7c3d9e2_0.idx", + "data/1753202639951/0200000000001c9e56b8g4f0x8s7g2f0d3c7e5b9f2e8d4f0_0.idx", + "data/1753202639953/0200000000001d0f67c9h5g8a3e6f8b1e4d8f6c0g3f9e5g1_0.idx", + "data/1753202639955/0200000000001e1g78d067c9h5g8i6h2f5e9g7d1h4g0f6h2_0.idx", + "data/1753202639957/0200000000001f2h89e1jg7d1h4g07i3g6f0h8e2i5h1g7i3_0.idx", + "data/1753202639959/020000000000208i90f2k0h8e2i5h8j4h7g1i9f3j6i2h8j4_0.idx", + "data/1753202639961/02000000000021aj01g3l9k5i8h2j8e2i5h8j0g4k7j3i9k5_0.idx", + "data/1753202639963/02000000000022bk12h4m0lk0h8e2i56j9i3k1h5l8k4j0l6_0.idx", + "data/1753202639965/02000000000023cl23i5n1m7g3l9k5i8k0j4l2i6m9l5k1m7_0.idx", + "data/1753202639967/02000000000024dm34j1m7g3l9k6o2n8l1k5m3j7n0m6l2n8_0.idx", + "data/1753202639969/02000000000025en45k7p3o9m2l6n4k34j1m7g38o1n7m3o9_0.idx", + "data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.idx", + "data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.idx", + "data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.idx", + "data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.idx", + "data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.idx", + "data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.idx", + "data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.idx", + "data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.idx"}; + +std::vector segment_v2_file_path = { + "data/1753202639945/0200000000001a5c92f4e7d9j8f2b4c8a3e6f8b1c9d2e5f8_0.dat", + "data/1753202639947/0200000000001b8d45a74r6c7sf3e9c2b6d4a8e1f7c3d9e2_0.dat", + "data/1753202639951/0200000000001c9e56b8g4f0x8s7g2f0d3c7e5b9f2e8d4f0_0.dat", + "data/1753202639953/0200000000001d0f67c9h5g8a3e6f8b1e4d8f6c0g3f9e5g1_0.dat", + "data/1753202639955/0200000000001e1g78d067c9h5g8i6h2f5e9g7d1h4g0f6h2_0.dat", + "data/1753202639957/0200000000001f2h89e1jg7d1h4g07i3g6f0h8e2i5h1g7i3_0.dat", + "data/1753202639959/020000000000208i90f2k0h8e2i5h8j4h7g1i9f3j6i2h8j4_0.dat", + "data/1753202639961/02000000000021aj01g3l9k5i8h2j8e2i5h8j0g4k7j3i9k5_0.dat", + "data/1753202639963/02000000000022bk12h4m0lk0h8e2i56j9i3k1h5l8k4j0l6_0.dat", + "data/1753202639965/02000000000023cl23i5n1m7g3l9k5i8k0j4l2i6m9l5k1m7_0.dat", + "data/1753202639967/02000000000024dm34j1m7g3l9k6o2n8l1k5m3j7n0m6l2n8_0.dat", + "data/1753202639969/02000000000025en45k7p3o9m2l6n4k34j1m7g38o1n7m3o9_0.dat", + "data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.dat", + "data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.dat", + "data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.dat", + "data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.dat", + "data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.dat", + "data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.dat", + "data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.dat", + "data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.dat"}; + +// clang-format off +std::vector index_v1_file_path = { + "data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0_1753202846943.idx", + "data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0_1753202841593.idx", + "data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0_1753202846943.idx", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx", + "data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202844931.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202846410.idx", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202847011.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202844931.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202846410.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202847011.idx", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202858543.idx", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202844931.idx", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202846410.idx", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202847011.idx", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202858543.idx", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202844931.idx", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202846410.idx", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202847011.idx"}; +// clang-format on + +std::vector segment_v1_file_path = { + "data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0.dat", + "data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat", + "data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat"}; + +doris::cloud::RecyclerThreadPoolGroup thread_group; int main(int argc, char** argv) { auto conf_file = "doris_cloud.conf"; @@ -240,6 +331,311 @@ static int create_tmp_rowset(TxnKv* txn_kv, StorageVaultAccessor* accessor, return 0; } +static int create_committed_rowset_with_tablet_schema( + TxnKv* txn_kv, StorageVaultAccessor* accessor, const std::string& resource_id, + int64_t tablet_id, int64_t version, int num_segments = 1, size_t num_inverted_indexes = 1, + bool use_inverted_index_storage_format_v1 = true) { + std::string val; + std::unique_ptr txn; + int64_t tablet_index_id = 123; + int64_t schema_version = 456; + + auto rowset_id = next_rowset_id(); + MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; + std::string rowset_meta_key = meta_rowset_key(key_info); + + doris::RowsetMetaCloudPB rowset_pb; + rowset_pb.set_rowset_id(0); // useless but required + rowset_pb.set_rowset_id_v2(rowset_id); + rowset_pb.set_num_segments(num_segments); + rowset_pb.set_tablet_id(tablet_id); + rowset_pb.set_resource_id(resource_id); + rowset_pb.set_creation_time(current_time); + rowset_pb.set_schema_version(schema_version); + rowset_pb.SerializeToString(&val); + + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(rowset_meta_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + TabletIndexPB tablet_index; + tablet_index.set_index_id(tablet_index_id); + tablet_index.set_tablet_id(tablet_id); + std::string tablet_index_key = meta_tablet_idx_key({instance_id, tablet_id}); + tablet_index.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_index_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + if (num_inverted_indexes) { + doris::TabletSchemaCloudPB tablet_schema; + if (use_inverted_index_storage_format_v1) { + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); + } else { + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + } + tablet_schema.set_schema_version(schema_version); + for (size_t i = 0; i < num_inverted_indexes; i++) { + auto index = tablet_schema.add_index(); + index->set_index_id(i); + index->set_index_type(IndexType::INVERTED); + } + std::string tablet_schema_key = + meta_schema_key({instance_id, tablet_index_id, schema_version}); + std::string val; + tablet_schema.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_schema_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + } + + for (int i = 0; i < num_segments; ++i) { + auto path = segment_path(tablet_id, rowset_id, i); + accessor->put_file(path, ""); + if (use_inverted_index_storage_format_v1) { + for (int j = 0; j < num_inverted_indexes; ++j) { + std::string path = inverted_index_path_v1(tablet_id, rowset_id, i, j, ""); + accessor->put_file(path, ""); + } + } else { + std::string path = inverted_index_path_v2(tablet_id, rowset_id, i); + accessor->put_file(path, ""); + } + } + return 0; +} + +static int create_committed_rowset_by_real_index_v2_file(TxnKv* txn_kv, + StorageVaultAccessor* accessor, + const std::string& resource_id, + const std::string& file_path, + int64_t version = 1) { + std::string val; + std::unique_ptr txn; + + // Parse file path to extract tablet_id and rowset_id + // Expected format: data/{tablet_id}/{rowset_id}_{segment_id}.{ext} + std::vector path_parts; + butil::SplitString(file_path, '/', &path_parts); + + if (path_parts.size() < 3 || path_parts[0] != "data") { + LOG(WARNING) << "Invalid file path format: " << file_path; + return -1; + } + + int64_t tablet_id = std::stoll(path_parts[1]); + std::string filename = path_parts[2]; + + // Extract rowset_id and segment_id from filename + size_t underscore_pos = filename.find_last_of('_'); + size_t dot_pos = filename.find_last_of('.'); + + if (underscore_pos == std::string::npos || dot_pos == std::string::npos || + underscore_pos >= dot_pos) { + LOG(WARNING) << "Invalid filename format: " << filename; + return -1; + } + + std::string rowset_id = filename.substr(0, underscore_pos); + std::string segment_str = filename.substr(underscore_pos + 1, dot_pos - underscore_pos - 1); + std::string extension = filename.substr(dot_pos + 1); + + int segment_id = stoll(segment_str); + int64_t tablet_index_id = 123; // Default index id + int64_t schema_version = 456; // Default schema version + + // Create rowset meta data + MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; + std::string rowset_meta_key = meta_rowset_key(key_info); + + doris::RowsetMetaCloudPB rowset_pb; + rowset_pb.set_rowset_id(0); // useless but required + rowset_pb.set_rowset_id_v2(rowset_id); + rowset_pb.set_num_segments(segment_id + 1); // segment_id is 0-based + rowset_pb.set_tablet_id(tablet_id); + rowset_pb.set_resource_id(resource_id); + rowset_pb.set_creation_time(current_time); + rowset_pb.set_schema_version(schema_version); + rowset_pb.SerializeToString(&val); + + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(rowset_meta_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + // Create tablet index meta data + TabletIndexPB tablet_index; + tablet_index.set_index_id(tablet_index_id); + tablet_index.set_tablet_id(tablet_id); + std::string tablet_index_key = meta_tablet_idx_key({instance_id, tablet_id}); + tablet_index.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_index_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + // Create tablet schema if dealing with index files + if (extension == "idx") { + doris::TabletSchemaCloudPB tablet_schema; + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); + tablet_schema.set_schema_version(schema_version); + + auto index = tablet_schema.add_index(); + index->set_index_id(0); + index->set_index_type(IndexType::INVERTED); + + std::string tablet_schema_key = + meta_schema_key({instance_id, tablet_index_id, schema_version}); + tablet_schema.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_schema_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + } + + accessor->put_file(file_path, ""); + + return 0; +} + +static int create_committed_rowset_by_real_index_v1_file(TxnKv* txn_kv, + StorageVaultAccessor* accessor, + const std::string& resource_id, + const std::string& file_path, + int64_t version = 1) { + std::string val; + std::unique_ptr txn; + + // Parse file path to extract tablet_id and rowset_id + // Expected format: data/{tablet_id}/{rowset_id}_{segment_id}_{index_id}{suffix}.idx + std::vector path_parts; + butil::SplitString(file_path, '/', &path_parts); + + if (path_parts.size() < 3 || path_parts[0] != "data") { + LOG(WARNING) << "Invalid file path format: " << file_path; + return -1; + } + + int64_t tablet_id = std::stoll(path_parts[1]); + std::string filename = path_parts[2]; + + // Extract rowset_id, segment_id, index_id, and suffix from filename + // Format: {rowset_id}_{segment_id}_{index_id}{suffix}.idx + size_t first_underscore_pos = filename.find('_'); + size_t second_underscore_pos = filename.find('_', first_underscore_pos + 1); + size_t dot_pos = filename.find_last_of('.'); + + if (first_underscore_pos == std::string::npos || second_underscore_pos == std::string::npos || + dot_pos == std::string::npos || first_underscore_pos >= second_underscore_pos || + second_underscore_pos >= dot_pos) { + LOG(WARNING) << "Invalid filename format: " << filename; + return -1; + } + + std::string rowset_id = filename.substr(0, first_underscore_pos); + std::string segment_str = filename.substr(first_underscore_pos + 1, + second_underscore_pos - first_underscore_pos - 1); + std::string remaining = + filename.substr(second_underscore_pos + 1, dot_pos - second_underscore_pos - 1); + std::string extension = filename.substr(dot_pos + 1); + + // Parse index_id and suffix from remaining part + // Format: {index_id}{suffix} or just {index_id} + std::string index_id_str = remaining; + std::string index_suffix = ""; + + int segment_id = stoll(segment_str); + int64_t index_id = std::stoll(index_id_str); + int64_t tablet_index_id = 123; // Default tablet index id + int64_t schema_version = 456; // Default schema version + + // Create rowset meta data + MetaRowsetKeyInfo key_info {instance_id, tablet_id, version}; + std::string rowset_meta_key = meta_rowset_key(key_info); + + doris::RowsetMetaCloudPB rowset_pb; + rowset_pb.set_rowset_id(0); // useless but required + rowset_pb.set_rowset_id_v2(rowset_id); + rowset_pb.set_num_segments(segment_id + 1); // segment_id is 0-based + rowset_pb.set_tablet_id(tablet_id); + rowset_pb.set_resource_id(resource_id); + rowset_pb.set_creation_time(current_time); + rowset_pb.set_schema_version(schema_version); + rowset_pb.SerializeToString(&val); + + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(rowset_meta_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + // Create tablet index meta data + TabletIndexPB tablet_index; + tablet_index.set_index_id(tablet_index_id); + tablet_index.set_tablet_id(tablet_id); + std::string tablet_index_key = meta_tablet_idx_key({instance_id, tablet_id}); + tablet_index.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_index_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + + // Create tablet schema if dealing with index files + if (extension == "idx") { + doris::TabletSchemaCloudPB tablet_schema; + tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); + tablet_schema.set_schema_version(schema_version); + + auto index = tablet_schema.add_index(); + index->set_index_id(index_id); + index->set_index_type(IndexType::INVERTED); + if (!index_suffix.empty()) { + index->set_index_suffix_name(index_suffix); + } + + std::string tablet_schema_key = + meta_schema_key({instance_id, tablet_index_id, schema_version}); + tablet_schema.SerializeToString(&val); + if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) { + return -1; + } + txn->put(tablet_schema_key, val); + if (txn->commit() != TxnErrorCode::TXN_OK) { + return -1; + } + } + + accessor->put_file(file_path, ""); + + return 0; +} + static int create_committed_rowset(TxnKv* txn_kv, StorageVaultAccessor* accessor, const std::string& resource_id, int64_t tablet_id, int64_t version, int num_segments = 1, @@ -2500,7 +2896,85 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) { ASSERT_NE(checker.do_inverted_check(), 0); } -TEST(CheckerTest, inverted_check_recycle_idx_file) { +TEST(CheckerTest, normal_check_index_file) { + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + + auto sp = SyncPoint::get_instance(); + SyncPoint::CallbackGuard guard; + sp->set_call_back( + "InstanceChecker::do_inverted_check", + [](auto&& args) { + auto* ret = try_any_cast_ret(args); + ret->first = 0; + ret->second = true; + }, + &guard); + sp->enable_processing(); + DORIS_CLOUD_DEFER { + SyncPoint::get_instance()->disable_processing(); + }; + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + ASSERT_EQ(checker.do_inverted_check(), 0); +} + +TEST(CheckerTest, normal_inverted_check_index_file) { + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + + auto sp = SyncPoint::get_instance(); + SyncPoint::CallbackGuard guard; + sp->set_call_back( + "InstanceChecker::do_inverted_check", + [](auto&& args) { + auto* ret = try_any_cast_ret(args); + ret->first = 0; + ret->second = true; + }, + &guard); + sp->enable_processing(); + DORIS_CLOUD_DEFER { + SyncPoint::get_instance()->disable_processing(); + }; + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + ASSERT_EQ(checker.do_inverted_check(), 0); +} + +TEST(CheckerTest, inverted_check_recycle_idx_file_v1) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -2539,34 +3013,207 @@ TEST(CheckerTest, inverted_check_recycle_idx_file) { }); sp->enable_processing(); - for (int t = 10001; t <= 10100; ++t) { - for (int v = 0; v < 10; ++v) { - int ret = create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 1, 3); - ASSERT_EQ(ret, 0) << "Failed to create committed rs: " << ret; + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + } + + size_t delete_kv_num = 5; + std::string meta_rowset_key_begin, meta_rowset_key_end; + meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin); + meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end); + std::vector rowset_key_to_delete; + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + + std::unique_ptr it; + do { + err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it); + while (it->has_next()) { + auto [k, v] = it->next(); + if (rowset_key_to_delete.size() < delete_kv_num) { + rowset_key_to_delete.emplace_back(k); + } + if (!it->has_next()) { + meta_rowset_key_begin = k; + } + } + meta_rowset_key_begin.push_back('\x00'); + } while (it->more()); + + for (const auto& key : rowset_key_to_delete) { + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + txn->remove(key); + err = txn->commit(); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + } + + std::unique_ptr list_iter; + int ret = accessor->list_directory("data", &list_iter); + ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; + + ASSERT_EQ(checker.do_inverted_check(), 1); +} + +TEST(CheckerTest, inverted_check_recycle_idx_file_v2) { + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer((int*)0x01, [&sp](int*) { + sp->clear_all_call_backs(); + sp->disable_processing(); + }); + + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + obj_info->set_ak(config::test_s3_ak); + obj_info->set_sk(config::test_s3_sk); + obj_info->set_endpoint(config::test_s3_endpoint); + obj_info->set_region(config::test_s3_region); + obj_info->set_bucket(config::test_s3_bucket); + obj_info->set_prefix("CheckerTest"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + + sp->set_call_back( + "InstanceRecycler::init_storage_vault_accessors.mock_vault", [&accessor](auto&& args) { + auto* map = try_any_cast< + std::unordered_map>*>( + args[0]); + auto* vault = try_any_cast(args[1]); + if (vault->name() == "test_success_hdfs_vault") { + map->emplace(vault->id(), accessor); + } + }); + sp->enable_processing(); + + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + + size_t delete_kv_num = 5; + std::string meta_rowset_key_begin, meta_rowset_key_end; + meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin); + meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end); + std::vector rowset_key_to_delete; + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + + std::unique_ptr it; + do { + err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it); + while (it->has_next()) { + auto [k, v] = it->next(); + if (rowset_key_to_delete.size() < delete_kv_num) { + rowset_key_to_delete.emplace_back(k); + } + if (!it->has_next()) { + meta_rowset_key_begin = k; + } } + meta_rowset_key_begin.push_back('\x00'); + } while (it->more()); + + for (const auto& key : rowset_key_to_delete) { + std::unique_ptr txn; + TxnErrorCode err = txn_kv->create_txn(&txn); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; + txn->remove(key); + err = txn->commit(); + DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err; } + std::unique_ptr list_iter; int ret = accessor->list_directory("data", &list_iter); ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; - int64_t tablet_id_to_delete_index = -1; + ASSERT_EQ(checker.do_inverted_check(), 1); +} + +TEST(CheckerTest, forward_check_recycle_idx_file_v1) { + auto* sp = SyncPoint::get_instance(); + std::unique_ptr> defer((int*)0x01, [&sp](int*) { + sp->clear_all_call_backs(); + sp->disable_processing(); + }); + + auto txn_kv = std::make_shared(); + ASSERT_EQ(txn_kv->init(), 0); + + InstanceInfoPB instance; + instance.set_instance_id(instance_id); + auto obj_info = instance.add_obj_info(); + obj_info->set_id("1"); + obj_info->set_ak(config::test_s3_ak); + obj_info->set_sk(config::test_s3_sk); + obj_info->set_endpoint(config::test_s3_endpoint); + obj_info->set_region(config::test_s3_region); + obj_info->set_bucket(config::test_s3_bucket); + obj_info->set_prefix("CheckerTest"); + + InstanceChecker checker(txn_kv, instance_id); + ASSERT_EQ(checker.init(instance), 0); + // Add some visible rowsets along with some rowsets that should be recycled + // call inverted check after do recycle which would sweep all the rowsets not visible + auto accessor = checker.accessor_map_.begin()->second; + + sp->set_call_back( + "InstanceRecycler::init_storage_vault_accessors.mock_vault", [&accessor](auto&& args) { + auto* map = try_any_cast< + std::unordered_map>*>( + args[0]); + auto* vault = try_any_cast(args[1]); + if (vault->name() == "test_success_hdfs_vault") { + map->emplace(vault->id(), accessor); + } + }); + sp->enable_processing(); + + for (const auto& file : index_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v1_file_path) { + create_committed_rowset_by_real_index_v1_file(txn_kv.get(), accessor.get(), "1", file); + } + std::unique_ptr list_iter; + int ret = accessor->list_directory("data", &list_iter); + ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; + + int64_t tablet_to_delete = -1; for (auto file = list_iter->next(); file.has_value(); file = list_iter->next()) { std::vector str; butil::SplitString(file->path, '/', &str); int64_t tablet_id = atol(str[1].c_str()); - // only delete one index files of ever tablet for mock recycle - // The reason for not select "delete all idx file" is that inverted checking cannot handle this case - // forward checking is required. - if (file->path.ends_with(".idx") && tablet_id_to_delete_index != tablet_id) { + // delete all index files of ever tablet for mock missing + if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) { + tablet_to_delete = tablet_id; accessor->delete_file(file->path); - tablet_id_to_delete_index = tablet_id; } } - ASSERT_EQ(checker.do_inverted_check(), 1); + ASSERT_EQ(checker.do_check(), 1); } -TEST(CheckerTest, forward_check_recycle_idx_file) { +TEST(CheckerTest, forward_check_recycle_idx_file_v2) { auto* sp = SyncPoint::get_instance(); std::unique_ptr> defer((int*)0x01, [&sp](int*) { sp->clear_all_call_backs(); @@ -2605,19 +3252,27 @@ TEST(CheckerTest, forward_check_recycle_idx_file) { }); sp->enable_processing(); - for (int t = 10001; t <= 10100; ++t) { - for (int v = 0; v < 10; ++v) { - create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 1, 3); - } + for (const auto& file : index_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); + } + + for (const auto& file : segment_v2_file_path) { + create_committed_rowset_by_real_index_v2_file(txn_kv.get(), accessor.get(), "1", file); } std::unique_ptr list_iter; int ret = accessor->list_directory("data", &list_iter); ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret; + int64_t tablet_to_delete = -1; for (auto file = list_iter->next(); file.has_value(); file = list_iter->next()) { - // delete all index files of ever tablet for mock recycle - if (file->path.ends_with(".idx")) { + std::vector str; + butil::SplitString(file->path, '/', &str); + int64_t tablet_id = atol(str[1].c_str()); + + // delete all index files of ever tablet for mock missing + if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) { accessor->delete_file(file->path); + tablet_to_delete = tablet_id; } } ASSERT_EQ(checker.do_check(), 1); @@ -2637,12 +3292,12 @@ TEST(CheckerTest, normal) { auto accessor = checker.accessor_map_.begin()->second; for (int t = 10001; t <= 10100; ++t) { for (int v = 0; v < 10; ++v) { - create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 1); + create_committed_rowset_with_tablet_schema(txn_kv.get(), accessor.get(), "1", t, v, 1); } } for (int t = 10101; t <= 10200; ++t) { for (int v = 0; v < 10; ++v) { - create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 5); + create_committed_rowset_with_tablet_schema(txn_kv.get(), accessor.get(), "1", t, v, 5); } } ASSERT_EQ(checker.do_check(), 0); @@ -2663,12 +3318,14 @@ TEST(CheckerTest, abnormal) { auto accessor = checker.accessor_map_.begin()->second; for (int t = 10001; t <= 10100; ++t) { for (int v = 0; v < 10; ++v) { - create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 1, 0); + create_committed_rowset_with_tablet_schema(txn_kv.get(), accessor.get(), "1", t, v, 1, + 0); } } for (int t = 10101; t <= 10200; ++t) { for (int v = 0; v < 10; ++v) { - create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 5, 0); + create_committed_rowset_with_tablet_schema(txn_kv.get(), accessor.get(), "1", t, v, 5, + 0); } }