From 3e1b2a74301bd21149e1d8f56e99b18b619b3b73 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Wed, 8 Jan 2025 23:39:15 +0800 Subject: [PATCH 1/2] [fix](meta-service) Read recycle rowset key before removing when commit rowset Test case (hard to make it a regression) ``` mysql -h127.0.0.1 -P8952 -uroot -Ddb1 -e 'drop table if exists t1' mysql -h127.0.0.1 -P8952 -uroot -Ddb1 -e 'CREATE TABLE t1 (id int, name text, score text) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 1' curl -XPOST "localhost:7211/api/update_config?disable_auto_compaction=true" curl "localhost:6000/MetaService/http/v1/injection_point?token=greedisgood9999&op=disable" curl "localhost:6000/MetaService/http/v1/injection_point?token=greedisgood9999&op=clear" for i in $(seq 1 1 100); do mysql -h127.0.0.1 -P8952 -uroot -Ddb1 -e 'insert into t1 values(1,1,1)' done curl "localhost:6000/MetaService/http/v1/injection_point?token=greedisgood9999&op=apply_suite&name=sleep_before_execute_commit_rowset" curl "localhost:6000/MetaService/http/v1/injection_point?token=greedisgood9999&op=enable" curl -XPOST "localhost:7211/api/update_config?disable_auto_compaction=false" sleep 60 instance_id=gavin_debug_instance_doris tablet_id=$(mysql -h127.0.0.1 -P8952 -uroot -Ddb1 -e 'show tablets from t1;' | awk '{if (++n>1) print $1}') curl "localhost:7211/api/compaction/show?tablet_id=${tablet_id}" txn_id=$(curl -s "localhost:6000/MetaService/http/get_value?token=greedisgood9999&unicode&key_type=MetaRowsetKey&instance_id=${instance_id}&tablet_id=${tablet_id}&version=101" | jq ".txn_id" | tr -d '"') curl -s "localhost:6000/MetaService/http/get_value?token=greedisgood9999&unicode&key_type=MetaRowsetTmpKey&instance_id=${instance_id}&tablet_id=${tablet_id}&txn_id=${txn_id}" ``` --- .../src/meta-service/injection_point_http.cpp | 22 ++++++++++++++++++- cloud/src/meta-service/meta_service.cpp | 11 ++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/cloud/src/meta-service/injection_point_http.cpp b/cloud/src/meta-service/injection_point_http.cpp index 18b2ddcebb07fd..7bfa8ddc4d0a2d 100644 --- a/cloud/src/meta-service/injection_point_http.cpp +++ b/cloud/src/meta-service/injection_point_http.cpp @@ -91,6 +91,26 @@ static void register_suites() { } }); }); + + suite_map.emplace("sleep_before_execute_commit_rowset", [] { + int duration_ms = 15000; + std::shared_ptr hit(new std::atomic_long(0)); + LOG(INFO) << "register injection point sleep_before_execute_commit_rowset sleep for " + << duration_ms << " ms"; + auto sp = SyncPoint::get_instance(); + + sp->set_call_back("MetaServiceImpl::commit_rowset", [duration_ms, hit](auto&& args) { + if (hit->fetch_add(1) == 0) { + const CreateRowsetRequest* req = try_any_cast(args[0]); + LOG(INFO) << "hit injection point sleep_before_execute_commit_rowset sleep for " + << duration_ms << " ms, request=" << req->ShortDebugString(); + std::this_thread::sleep_for(std::chrono::milliseconds(duration_ms)); + return; + } + LOG(WARNING) << "injection point sleep_before_execute_commit_rowset without sleep, hit=" + << hit->load(); + }); + }); } bool url_decode(const std::string& in, std::string* out) { @@ -341,4 +361,4 @@ HttpResponse process_injection_point(MetaServiceImpl* service, brpc::Controller* return http_json_reply(MetaServiceCode::INVALID_ARGUMENT, "unknown op:" + op); } -} // namespace doris::cloud \ No newline at end of file +} // namespace doris::cloud diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 7914bf5db11cf6..4d5b7fcf5581d5 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1067,6 +1067,7 @@ void MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle CreateRowsetResponse* response, ::google::protobuf::Closure* done) { RPC_PREPROCESS(commit_rowset); + TEST_INJECTION_POINT_CALLBACK("MetaServiceImpl::commit_rowset", request); if (!request->has_rowset_meta()) { code = MetaServiceCode::INVALID_ARGUMENT; msg = "no rowset meta"; @@ -1166,6 +1167,16 @@ void MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle } auto recycle_rs_key = recycle_rowset_key({instance_id, tablet_id, rowset_id}); + std::string recycle_rs_val; + err = txn->get(recycle_rs_key, &recycle_rs_val); + if (err != TxnErrorCode::TXN_OK) { + std::stringstream ss; + ss << "failed to get recycle_rowset_key when commit rowset instance_id=" << instance_id + << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id; + code = cast_as(err); + msg = ss.str(); + return; + } txn->remove(recycle_rs_key); DCHECK_GT(rowset_meta.txn_expiration(), 0); From 7dda53f4373aaafa73433139d7e98773c375fe15 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Sat, 18 Jan 2025 17:47:17 +0800 Subject: [PATCH 2/2] Fix cloud UTs: commit rowset, loopaddr, recycler JVM memory, resource drop FE --- cloud/script/run_all_tests.sh | 6 +-- cloud/src/meta-service/meta_service.cpp | 2 +- cloud/src/recycler/hdfs_accessor.cpp | 1 + cloud/src/recycler/recycler.cpp | 11 ++++- cloud/test/fdb_injection_test.cpp | 2 +- cloud/test/meta_service_job_test.cpp | 61 +++++++++++++++++++++---- cloud/test/meta_service_test.cpp | 38 ++++++++++++++- cloud/test/network_util_test.cpp | 1 + cloud/test/recycler_test.cpp | 9 +++- cloud/test/resource_test.cpp | 4 +- cloud/test/schema_kv_test.cpp | 3 +- cloud/test/txn_lazy_commit_test.cpp | 55 ++++++++++++++++++++++ 12 files changed, 174 insertions(+), 19 deletions(-) diff --git a/cloud/script/run_all_tests.sh b/cloud/script/run_all_tests.sh index 330fb00449350b..5ff46b1da0be6f 100644 --- a/cloud/script/run_all_tests.sh +++ b/cloud/script/run_all_tests.sh @@ -142,12 +142,12 @@ for i in *_test; do if [[ "${fdb}" != "" ]]; then patchelf --set-rpath "$(pwd)" "${i}" fi - + LLVM_PROFILE_FILE="./report/${i}.profraw" set -euo pipefail if [[ "${filter}" == "" ]]; then - LLVM_PROFILE_FILE="./report/${i}.profraw" "./${i}" --gtest_print_time=true --gtest_output="xml:${i}.xml" + "./${i}" --gtest_print_time=true --gtest_output="xml:${i}.xml" else - LLVM_PROFILE_FILE="./report/${i}.profraw" "./${i}" --gtest_print_time=true --gtest_output="xml:${i}.xml" --gtest_filter="${filter}" + "./${i}" --gtest_print_time=true --gtest_output="xml:${i}.xml" --gtest_filter="${filter}" fi set +euo pipefail unittest_files[${#unittest_files[*]}]="${i}" diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 4d5b7fcf5581d5..1c752345eb6e5e 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -1172,7 +1172,7 @@ void MetaServiceImpl::commit_rowset(::google::protobuf::RpcController* controlle if (err != TxnErrorCode::TXN_OK) { std::stringstream ss; ss << "failed to get recycle_rowset_key when commit rowset instance_id=" << instance_id - << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id; + << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id << " ret=" << err; code = cast_as(err); msg = ss.str(); return; diff --git a/cloud/src/recycler/hdfs_accessor.cpp b/cloud/src/recycler/hdfs_accessor.cpp index 1999bcfa16543a..eff35afb5a1a93 100644 --- a/cloud/src/recycler/hdfs_accessor.cpp +++ b/cloud/src/recycler/hdfs_accessor.cpp @@ -356,6 +356,7 @@ std::string extract_parent_path(const std::string& path) { } int HdfsAccessor::init() { + TEST_SYNC_POINT_RETURN_WITH_VALUE("HdfsAccessor::init", 0); // TODO(plat1ko): Cache hdfsFS fs_ = HDFSBuilder::create_fs(info_.build_conf()); if (!fs_) { diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index 62a161e3edbe52..b54760536fda54 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -1902,7 +1902,7 @@ int InstanceRecycler::recycle_rowsets() { return final_expiration; }; - auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { + auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) -> int { ++num_scanned; total_rowset_key_size += k.size(); total_rowset_value_size += v.size(); @@ -1919,6 +1919,13 @@ int InstanceRecycler::recycle_rowsets() { if (current_time < calc_expiration(rowset)) { // not expired return 0; } + // TODO(gavin): The recycle key is marked as an expired rowset, indicating that it should be + // recycled. However, it may still be referenced by load, SC, or compaction + // processes. + // To resolve this, create a key-value transaction to read and update its + // status to EXPIRED. This will ensure that it conflicts with other commit + // procedures, preventing potential issues. + ++num_expired; expired_rowset_size += v.size(); if (!rowset.has_type()) { // old version `RecycleRowsetPB` @@ -2008,7 +2015,7 @@ int InstanceRecycler::recycle_rowsets() { return 0; }; - int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv), + int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv), std::move(loop_done)); worker_pool->stop(); diff --git a/cloud/test/fdb_injection_test.cpp b/cloud/test/fdb_injection_test.cpp index 60226e7f9521a2..ae5e7307ce6aec 100644 --- a/cloud/test/fdb_injection_test.cpp +++ b/cloud/test/fdb_injection_test.cpp @@ -93,7 +93,7 @@ int main(int argc, char** argv) { [](auto&& args) { *try_any_cast(args[0]) = 0; }); sp->set_call_back("put_schema_kv:schema_key_exists_return", [](auto&& args) { *try_any_cast(args.back()) = true; }); - sp->set_call_back("resource_manager::set_safe_drop_time", + sp->set_call_back("resource_manager::set_safe_drop_time", // make it always safe to drop [](auto&& args) { *try_any_cast(args[0]) = -1; }); meta_service = create_meta_service(); diff --git a/cloud/test/meta_service_job_test.cpp b/cloud/test/meta_service_job_test.cpp index e1dbb0089e2e73..d031c2bbdb8dbf 100644 --- a/cloud/test/meta_service_job_test.cpp +++ b/cloud/test/meta_service_job_test.cpp @@ -103,6 +103,14 @@ doris::RowsetMetaCloudPB create_rowset(int64_t tablet_id, int64_t start_version, return rowset; } +static void prepare_rowset(MetaServiceProxy* meta_service, const doris::RowsetMetaCloudPB& rowset, + CreateRowsetResponse& res) { + brpc::Controller cntl; + CreateRowsetRequest req; + req.mutable_rowset_meta()->CopyFrom(rowset); + meta_service->prepare_rowset(&cntl, &req, &res, nullptr); +} + void commit_rowset(MetaService* meta_service, const doris::RowsetMetaCloudPB& rowset, CreateRowsetResponse& res) { brpc::Controller cntl; @@ -1402,6 +1410,9 @@ TEST(MetaServiceJobTest, SchemaChangeJobTest) { for (int64_t i = 0; i < 5; ++i) { output_rowsets.push_back(create_rowset(new_tablet_id, i + 2, i + 2)); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), output_rowsets.back(), res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), output_rowsets.back(), res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << i; } @@ -1479,6 +1490,9 @@ TEST(MetaServiceJobTest, SchemaChangeJobTest) { output_rowsets.push_back(create_rowset(new_tablet_id, 13, 13)); for (auto& rs : output_rowsets) { CreateRowsetResponse res; + prepare_rowset(meta_service.get(), rs, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), rs, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << rs.end_version(); } @@ -1615,8 +1629,12 @@ TEST(MetaServiceJobTest, RetrySchemaChangeJobTest) { be1_output_rowsets.push_back(create_rowset(new_tablet_id, 11, 11)); for (auto& rs : be1_output_rowsets) { CreateRowsetResponse res; + prepare_rowset(meta_service.get(), rs, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), rs, res); - ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << rs.end_version(); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK) + << "end_version=" << rs.end_version() << res.status().ShortDebugString(); } sc_res.Clear(); // FE thinks BE1 is not alive and retries "job2" on BE2, should preempt "job2" created by BE1 @@ -1628,17 +1646,28 @@ TEST(MetaServiceJobTest, RetrySchemaChangeJobTest) { { CreateRowsetResponse res; // [2-8] has committed by BE1 - commit_rowset(meta_service.get(), create_rowset(new_tablet_id, 2, 8), res); + auto rs_meta = create_rowset(new_tablet_id, 2, 8); + prepare_rowset(meta_service.get(), rs_meta, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::ALREADY_EXISTED) + << res.status().ShortDebugString(); + res.Clear(); + commit_rowset(meta_service.get(), rs_meta, res); ASSERT_EQ(res.status().code(), MetaServiceCode::ALREADY_EXISTED); ASSERT_TRUE(res.has_existed_rowset_meta()); ASSERT_EQ(res.existed_rowset_meta().rowset_id_v2(), be1_output_rowsets[0].rowset_id_v2()); be2_output_rowsets.push_back(res.existed_rowset_meta()); res.Clear(); be2_output_rowsets.push_back(create_rowset(new_tablet_id, 9, 12)); + prepare_rowset(meta_service.get(), be2_output_rowsets.back(), res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), be2_output_rowsets.back(), res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); res.Clear(); be2_output_rowsets.push_back(create_rowset(new_tablet_id, 13, 13)); + prepare_rowset(meta_service.get(), be2_output_rowsets.back(), res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), be2_output_rowsets.back(), res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1760,6 +1789,9 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { for (int64_t i = 0; i < 5; ++i) { output_rowsets.push_back(create_rowset(new_tablet_id, i + 2, i + 2)); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), output_rowsets.back(), res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), output_rowsets.back(), res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << i; } @@ -1808,6 +1840,9 @@ TEST(MetaServiceJobTest, SchemaChangeJobWithMoWTest) { for (int64_t i = 0; i < 5; ++i) { output_rowsets.push_back(create_rowset(new_tablet_id, i + 2, i + 2)); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), output_rowsets.back(), res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), output_rowsets.back(), res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << i; } @@ -1959,9 +1994,14 @@ TEST(MetaServiceJobTest, ConcurrentCompactionTest) { { // Provide output rowset auto output_rowset = create_rowset(tablet_id, 5, 10); - CreateRowsetResponse rowset_res; - commit_rowset(meta_service.get(), output_rowset, rowset_res); - ASSERT_EQ(rowset_res.status().code(), MetaServiceCode::OK); + { + CreateRowsetResponse res; + prepare_rowset(meta_service.get(), output_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); + commit_rowset(meta_service.get(), output_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + } FinishTabletJobRequest req; FinishTabletJobResponse res; @@ -2071,9 +2111,14 @@ TEST(MetaServiceJobTest, ConcurrentCompactionTest) { { // Provide output rowset auto output_rowset = create_rowset(tablet_id, 2, 4); - CreateRowsetResponse rowset_res; - commit_rowset(meta_service.get(), output_rowset, rowset_res); - ASSERT_EQ(rowset_res.status().code(), MetaServiceCode::OK); + { + CreateRowsetResponse res; + prepare_rowset(meta_service.get(), output_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); + commit_rowset(meta_service.get(), output_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + } FinishTabletJobRequest req; FinishTabletJobResponse res; diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index fb17c29629b04e..23831ead0497e2 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -181,6 +181,9 @@ static void commit_txn(MetaServiceProxy* meta_service, int64_t db_id, int64_t tx ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << label; } +/** + * Generates a rowset meta represented by RowsetMetaCloudPB + */ static doris::RowsetMetaCloudPB create_rowset(int64_t txn_id, int64_t tablet_id, int partition_id = 10, int64_t version = -1, int num_rows = 100) { @@ -1724,6 +1727,9 @@ TEST(MetaServiceTest, CommitTxnTest) { create_tablet(meta_service.get(), 1234, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1822,6 +1828,9 @@ TEST(MetaServiceTest, CommitTxnExpiredTest) { create_tablet(meta_service.get(), 1234789234, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1850,6 +1859,9 @@ static void create_and_commit_rowset(MetaServiceProxy* meta_service, int64_t tab create_tablet(meta_service, table_id, index_id, partition_id, tablet_id); auto tmp_rowset = create_rowset(txn_id, tablet_id, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service, tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service, tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -2443,6 +2455,9 @@ TEST(MetaServiceTest, AbortTxnTest) { create_tablet(meta_service.get(), 12345, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -2493,8 +2508,11 @@ TEST(MetaServiceTest, AbortTxnTest) { create_tablet(meta_service.get(), table_id, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; - commit_rowset(meta_service.get(), tmp_rowset, res); + prepare_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); + commit_rowset(meta_service.get(), tmp_rowset, res); + EXPECT_EQ(res.status().code(), MetaServiceCode::OK) << res.status().ShortDebugString(); } // abort txn by db_id and label @@ -2683,6 +2701,9 @@ TEST(MetaServiceTest, CheckTxnConflictTest) { create_tablet(meta_service.get(), table_id, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -2954,6 +2975,9 @@ TEST(MetaServiceTest, CleanTxnLabelTest) { create_tablet(meta_service.get(), 1234, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -3207,6 +3231,9 @@ TEST(MetaServiceTest, CleanTxnLabelTest) { create_tablet(meta_service.get(), 1234, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -3275,6 +3302,9 @@ TEST(MetaServiceTest, CleanTxnLabelTest) { create_tablet(meta_service.get(), 1234, 1235, 1236, tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -5435,6 +5465,9 @@ TEST(MetaServiceTest, DeleteBimapCommitTxnTest) { auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); tmp_rowset.set_partition_id(partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -5574,6 +5607,9 @@ TEST(MetaServiceTest, WrongPendingBitmapTest) { auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i); tmp_rowset.set_partition_id(partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } diff --git a/cloud/test/network_util_test.cpp b/cloud/test/network_util_test.cpp index 06f425f244f859..0ee3f7c318f7a7 100644 --- a/cloud/test/network_util_test.cpp +++ b/cloud/test/network_util_test.cpp @@ -28,6 +28,7 @@ int main(int argc, char** argv) { TEST(NetWorkUtilTest, GetLocaHostTest) { doris::cloud::config::priority_networks = ""; + doris::cloud::config::enable_loopback_address_for_ms = true; // prepare an existed ip for test auto ip = doris::cloud::get_local_ip(doris::cloud::config::priority_networks); std::cout << "get ip: " << ip << " from butil::my_ip_cstr()" << std::endl; diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index 567d27f5d6f3c4..953fc19aaab10f 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -3315,6 +3315,12 @@ TEST(RecyclerTest, init_vault_accessor_failed_test) { txn->put(storage_vault_key({instance.instance_id(), "2"}), vault.SerializeAsString()); } + // ATTN(gavin): we have to inject this point, otherwise it may complain ASAN memory issue of JVM + sp->set_call_back("HdfsAccessor::init", [](auto&& args) { + auto* ret = try_any_cast*>(args[0]); + ret->first = -1; // return -1, make it fail + ret->second = true; + }); // failed to init because accessor->init() fails { HdfsBuildConf hdfs_build_conf; @@ -3354,7 +3360,8 @@ TEST(RecyclerTest, init_vault_accessor_failed_test) { }); sp->enable_processing(); - // succeed to init MockAccessor + // succeed to init MockAccessor, sync point "HdfsAccessor::init" is set to return -1 but it is + // ignored by sync point "InstanceRecycler::init_storage_vault_accessors.mock_vault" { HdfsBuildConf hdfs_build_conf; StorageVaultPB vault; diff --git a/cloud/test/resource_test.cpp b/cloud/test/resource_test.cpp index c8d53408a048e1..599e5245020655 100644 --- a/cloud/test/resource_test.cpp +++ b/cloud/test/resource_test.cpp @@ -209,7 +209,7 @@ static void drop_cluster(MetaServiceProxy* ms, const std::string& instance_id, req.set_op(AlterClusterRequest::DROP_CLUSTER); AlterClusterResponse res; ms->alter_cluster(&cntl, &req, &res, brpc::DoNothing()); - ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK) << res.status().ShortDebugString(); } // test cluster's node addr use ip @@ -400,6 +400,8 @@ TEST(ResourceTest, AddDropCluster) { auto* ret = try_any_cast(args[1]); *ret = 0; }); + sp->set_call_back("resource_manager::set_safe_drop_time", // make it always safe to drop + [](auto&& args) { *try_any_cast(args[0]) = -1; }); sp->enable_processing(); auto meta_service = get_meta_service(); diff --git a/cloud/test/schema_kv_test.cpp b/cloud/test/schema_kv_test.cpp index 52e54f5e494b7f..b1c5fff72e5913 100644 --- a/cloud/test/schema_kv_test.cpp +++ b/cloud/test/schema_kv_test.cpp @@ -540,7 +540,8 @@ TEST(DetachSchemaKVTest, RowsetTest) { auto schema_version = get_rowset_res->rowset_meta(10).schema_version(); get_rowset_res->mutable_rowset_meta(10)->mutable_tablet_schema()->set_schema_version(3); EXPECT_EQ(get_rowset_res->rowset_meta(10).tablet_schema().SerializeAsString(), - schema->SerializeAsString()); + schema->SerializeAsString()) + << "table_id=" << table_id; get_rowset_res->mutable_rowset_meta(10)->mutable_tablet_schema()->set_schema_version( schema_version); } diff --git a/cloud/test/txn_lazy_commit_test.cpp b/cloud/test/txn_lazy_commit_test.cpp index 0f284508a3f34e..f37f84d0ff8726 100644 --- a/cloud/test/txn_lazy_commit_test.cpp +++ b/cloud/test/txn_lazy_commit_test.cpp @@ -174,6 +174,16 @@ static doris::RowsetMetaCloudPB create_rowset(int64_t txn_id, int64_t tablet_id, return rowset; } +static void prepare_rowset(MetaServiceProxy* meta_service, const doris::RowsetMetaCloudPB& rowset, + CreateRowsetResponse& res) { + brpc::Controller cntl; + auto arena = res.GetArena(); + auto req = google::protobuf::Arena::CreateMessage(arena); + req->mutable_rowset_meta()->CopyFrom(rowset); + meta_service->prepare_rowset(&cntl, req, &res, nullptr); + if (!arena) delete req; +} + static void commit_rowset(MetaServiceProxy* meta_service, const doris::RowsetMetaCloudPB& rowset, CreateRowsetResponse& res) { brpc::Controller cntl; @@ -424,6 +434,9 @@ TEST(TxnLazyCommitTest, RepairTabletIndexTest) { auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); tmp_rowsets_meta.push_back(std::make_pair("mock_tmp_rowset_key", tmp_rowset)); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -511,6 +524,9 @@ TEST(TxnLazyCommitTest, CommitTxnEventuallyWithoutDbIdTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -592,6 +608,9 @@ TEST(TxnLazyCommitTest, CommitTxnEventuallyWithAbortedTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -701,6 +720,9 @@ TEST(TxnLazyCommitTest, CommitTxnEventuallyWithDbIdTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -779,6 +801,9 @@ TEST(TxnLazyCommitTest, CommitTxnImmediatelyTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -867,6 +892,9 @@ TEST(TxnLazyCommitTest, NotFallThroughCommitTxnEventuallyTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -955,6 +983,9 @@ TEST(TxnLazyCommitTest, FallThroughCommitTxnEventuallyTest) { tablet_id_base + i); auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1112,6 +1143,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase1Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id1, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1159,6 +1193,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase1Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id2, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1350,6 +1387,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase2Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id1, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1397,6 +1437,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase2Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id2, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1507,6 +1550,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase3Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(tmp_txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1616,6 +1662,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase3Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id1, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1767,6 +1816,9 @@ TEST(TxnLazyCommitTest, ConcurrentCommitTxnEventuallyCase4Test) { for (int i = 0; i < 10; ++i) { auto tmp_rowset = create_rowset(txn_id, tablet_id_base + i, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); } @@ -1852,6 +1904,9 @@ TEST(TxnLazyCommitTest, RowsetMetaSizeExceedTest) { { auto tmp_rowset = create_rowset(tmp_txn_id, tablet_id, partition_id); CreateRowsetResponse res; + prepare_rowset(meta_service.get(), tmp_rowset, res); + ASSERT_EQ(res.status().code(), MetaServiceCode::OK); + res.Clear(); commit_rowset(meta_service.get(), tmp_rowset, res); ASSERT_EQ(res.status().code(), MetaServiceCode::OK); }