From 43b2fdee9bc4aa426bb57d0bf1b3a76bc742330d Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Tue, 5 Nov 2024 14:31:21 +0800 Subject: [PATCH 1/6] [fix](cloud) Check instance_id valid when use cloud_unique_id degrade format --- cloud/src/common/config.h | 2 + cloud/src/meta-service/meta_service.cpp | 49 ++++++++++++++----- .../src/resource-manager/resource_manager.cpp | 2 +- cloud/test/meta_service_test.cpp | 16 ++++++ 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index da6ae113cd7349..af8b8f90469ff3 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -219,4 +219,6 @@ CONF_Int32(max_tablet_index_num_per_batch, "1000"); // Max aborted txn num for the same label name CONF_mInt64(max_num_aborted_txn, "100"); + +CONF_Bool(enable_check_cloud_unique_id_degrade_format, "true"); } // namespace doris::cloud::config diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index dfa06ca5fa08dc..905a27b043838a 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -88,6 +88,7 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, std::vector nodes; std::string err = rc_mgr->get_node(cloud_unique_id, &nodes); { TEST_SYNC_POINT_CALLBACK("get_instance_id_err", &err); } + std::string instance_id; if (!err.empty()) { // cache can't find cloud_unique_id, so degraded by parse cloud_unique_id // cloud_unique_id encode: ${version}:${instance_id}:${unique_id} @@ -97,8 +98,8 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, for (int i = 0; i < vec.size(); ++i) { ss << "idx " << i << "= [" << vec[i] << "] "; } - LOG(INFO) << "degraded to get instance_id, cloud_unique_id: " << cloud_unique_id - << "after split: " << ss.str(); + LOG_EVERY_N(INFO, 100) << "degraded to get instance_id, cloud_unique_id: " + << cloud_unique_id << "after split: " << ss.str(); if (vec.size() != 3) { LOG(WARNING) << "cloud unique id is not degraded format, failed to check instance " "info, cloud_unique_id=" @@ -106,28 +107,52 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, return ""; } // version: vec[0], instance_id: vec[1], unique_id: vec[2] - switch (std::atoi(vec[0].c_str())) { - case 1: - // just return instance id; - return vec[1]; - default: + int version = std::atoi(vec[0].c_str()); + if (version != 1) { LOG(WARNING) << "cloud unique id degraded state, but version not eq configure, " "cloud_unique_id=" << cloud_unique_id << ", err=" << err; return ""; } + instance_id = vec[1]; + LOG_EVERY_N(INFO, 100) << "use degraded to get instance_id, instance_id=" << instance_id; + // check instance_id valid by get fdb + if (config::enable_check_cloud_unique_id_degrade_format) { + // check kv + auto [c0, m0] = rc_mgr->get_instance(nullptr, instance_id, nullptr); + { TEST_SYNC_POINT_CALLBACK("get_instance", &c0); } + if (c0 != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "check instance instance_id=" << instance_id + << " failed, code=" << format_as(c0) << ", info=" + m0; + return ""; + } + } + return instance_id; } - std::string instance_id; - for (auto& i : nodes) { - if (!instance_id.empty() && instance_id != i.instance_id) { + for (auto& node : nodes) { + if (!instance_id.empty() && instance_id != node.instance_id) { LOG(WARNING) << "cloud_unique_id is one-to-many instance_id, " << " cloud_unique_id=" << cloud_unique_id << " current_instance_id=" << instance_id - << " later_instance_id=" << i.instance_id; + << " later_instance_id=" << node.instance_id; + } + instance_id = node.instance_id; // The last wins + // check cache unique_id + std::string cloud_unique_id_in_cache = node.node_info.cloud_unique_id(); + auto v = split(cloud_unique_id, ':'); + if (v.size() != 3) continue; + // degraded format check it + int version = std::atoi(v[0].c_str()); + if (version != 1 || v[1] != node.instance_id || v[1] != instance_id) { + LOG(WARNING) << "in cache, node=" << node.node_info.DebugString() + << ", cloud_unique_id=" << cloud_unique_id + << " current_instance_id=" << instance_id + << ", later_instance_id=" << node.instance_id; + continue; } - instance_id = i.instance_id; // The last wins } + return instance_id; } diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index 43f0a7368d812b..864f5bacb0cc3c 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -624,7 +624,7 @@ std::pair ResourceManager::get_instance(std::shared_p return ec; } - if (!inst_pb->ParseFromString(val)) { + if (inst_pb != nullptr && !inst_pb->ParseFromString(val)) { code = TxnErrorCode::TXN_UNIDENTIFIED_ERROR; msg = "failed to parse InstanceInfoPB"; return ec; diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index ee90e604e1c5f6..4254633ca42442 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -56,6 +56,7 @@ int main(int argc, char** argv) { config::enable_txn_store_retry = true; config::txn_store_retry_base_intervals_ms = 1; config::txn_store_retry_times = 20; + config::enable_check_cloud_unique_id_degrade_format = false; if (!doris::cloud::init_glog("meta_service_test")) { std::cerr << "failed to init glog" << std::endl; @@ -264,6 +265,21 @@ TEST(MetaServiceTest, GetInstanceIdTest) { "12345678901:ALBJLH4Q:m-n3qdpyal27rh8iprxx"); ASSERT_EQ(instance_id, ""); + config::enable_check_cloud_unique_id_degrade_format = true; + auto ms = get_meta_service(false); + instance_id = + get_instance_id(ms->resource_mgr(), "1:ALBJLH4Q-check-invalid:m-n3qdpyal27rh8iprxx"); + ASSERT_EQ(instance_id, ""); + + sp->set_call_back("get_instance", [&](auto&& args) { + TxnErrorCode* c0 = try_any_cast(args[0]); + *c0 = TxnErrorCode::TXN_OK; + }); + instance_id = + get_instance_id(ms->resource_mgr(), "1:ALBJLH4Q-check-invalid:m-n3qdpyal27rh8iprxx"); + ASSERT_EQ(instance_id, "ALBJLH4Q-check-invalid"); + config::enable_check_cloud_unique_id_degrade_format = false; + sp->clear_all_call_backs(); sp->clear_trace(); sp->disable_processing(); From 59206f3eb1f07d34cb6090dc7f1fe2afb6adea8e Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Tue, 5 Nov 2024 17:09:07 +0800 Subject: [PATCH 2/6] fix --- cloud/src/meta-service/meta_service.cpp | 51 ++++++------------- .../meta-service/meta_service_resource.cpp | 11 ++++ .../src/resource-manager/resource_manager.cpp | 23 +++++++++ cloud/src/resource-manager/resource_manager.h | 19 +++++++ cloud/test/meta_service_test.cpp | 2 +- 5 files changed, 70 insertions(+), 36 deletions(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 905a27b043838a..2d90815b99c81a 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -93,39 +93,19 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, // cache can't find cloud_unique_id, so degraded by parse cloud_unique_id // cloud_unique_id encode: ${version}:${instance_id}:${unique_id} // check it split by ':' c - auto vec = split(cloud_unique_id, ':'); - std::stringstream ss; - for (int i = 0; i < vec.size(); ++i) { - ss << "idx " << i << "= [" << vec[i] << "] "; - } - LOG_EVERY_N(INFO, 100) << "degraded to get instance_id, cloud_unique_id: " - << cloud_unique_id << "after split: " << ss.str(); - if (vec.size() != 3) { - LOG(WARNING) << "cloud unique id is not degraded format, failed to check instance " - "info, cloud_unique_id=" - << cloud_unique_id << " , err=" << err; + auto [valid, instance_id] = rc_mgr->get_instance_id_by_degrade_unique_id(cloud_unique_id); + if (!valid) { + LOG(WARNING) << "use degraded format cloud_unique_id, but cloud_unique_id not degrade " + "format, cloud_unique_id=" + << cloud_unique_id; return ""; } - // version: vec[0], instance_id: vec[1], unique_id: vec[2] - int version = std::atoi(vec[0].c_str()); - if (version != 1) { - LOG(WARNING) << "cloud unique id degraded state, but version not eq configure, " - "cloud_unique_id=" - << cloud_unique_id << ", err=" << err; - return ""; - } - instance_id = vec[1]; - LOG_EVERY_N(INFO, 100) << "use degraded to get instance_id, instance_id=" << instance_id; + // check instance_id valid by get fdb - if (config::enable_check_cloud_unique_id_degrade_format) { - // check kv - auto [c0, m0] = rc_mgr->get_instance(nullptr, instance_id, nullptr); - { TEST_SYNC_POINT_CALLBACK("get_instance", &c0); } - if (c0 != TxnErrorCode::TXN_OK) { - LOG(WARNING) << "check instance instance_id=" << instance_id - << " failed, code=" << format_as(c0) << ", info=" + m0; - return ""; - } + if (config::enable_check_cloud_unique_id_degrade_format && + !rc_mgr->check_degrade_instance_valid(instance_id)) { + LOG(WARNING) << "use degraded format cloud_unique_id, but check instance failed"; + return ""; } return instance_id; } @@ -140,11 +120,12 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, instance_id = node.instance_id; // The last wins // check cache unique_id std::string cloud_unique_id_in_cache = node.node_info.cloud_unique_id(); - auto v = split(cloud_unique_id, ':'); - if (v.size() != 3) continue; - // degraded format check it - int version = std::atoi(v[0].c_str()); - if (version != 1 || v[1] != node.instance_id || v[1] != instance_id) { + auto [valid, id] = rc_mgr->get_instance_id_by_degrade_unique_id(cloud_unique_id_in_cache); + if (!valid) { + continue; + } + + if (id != node.instance_id || id != instance_id) { LOG(WARNING) << "in cache, node=" << node.node_info.DebugString() << ", cloud_unique_id=" << cloud_unique_id << " current_instance_id=" << instance_id diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 399e0964f4da1d..27010606155ec1 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -1922,6 +1922,7 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, RPC_PREPROCESS(alter_cluster); std::string cloud_unique_id = request->has_cloud_unique_id() ? request->cloud_unique_id() : ""; instance_id = request->has_instance_id() ? request->instance_id() : ""; + bool is_degrade_format = false; if (!cloud_unique_id.empty() && instance_id.empty()) { instance_id = get_instance_id(resource_mgr_, cloud_unique_id); if (instance_id.empty()) { @@ -1930,6 +1931,16 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, LOG(INFO) << msg << ", cloud_unique_id=" << cloud_unique_id; return; } + auto [is_degrade, _] = resource_mgr_->get_instance_id_by_degrade_unique_id(cloud_unique_id); + is_degrade_format = is_degrade; + } + + if (config::enable_check_cloud_unique_id_degrade_format && is_degrade_format && + !resource_mgr_->check_degrade_instance_valid(instance_id)) { + msg = "use degrade cloud_unique_id, but instance_id invalid, cloud_unique_id=" + + cloud_unique_id; + LOG(WARNING) << msg; + code = MetaServiceCode::INVALID_ARGUMENT; } if (instance_id.empty() || !request->has_cluster()) { diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index 864f5bacb0cc3c..ef743aaadc9750 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -23,6 +23,7 @@ #include #include "common/logging.h" +#include "common/string_util.h" #include "common/util.h" #include "cpp/sync_point.h" #include "meta-service/keys.h" @@ -199,6 +200,28 @@ bool ResourceManager::check_cluster_params_valid(const ClusterPB& cluster, std:: return no_err; } +std::pair ResourceManager::get_instance_id_by_degrade_unique_id( + const std::string& cloud_unique_id) { + auto v = split(cloud_unique_id, ':'); + if (v.size() != 3) return {false, ""}; + // degraded format check it + int version = std::atoi(v[0].c_str()); + if (version != 1) return {false, ""}; + return {true, v[1]}; +} + +bool ResourceManager::check_degrade_instance_valid(const std::string& instance_id) { + // check kv + auto [c0, m0] = get_instance(nullptr, instance_id, nullptr); + { TEST_SYNC_POINT_CALLBACK("check_degrade_instance_valid", &c0); } + if (c0 != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "check instance instance_id=" << instance_id + << " failed, code=" << format_as(c0) << ", info=" + m0; + return false; + } + return true; +} + std::pair ResourceManager::add_cluster(const std::string& instance_id, const ClusterInfo& cluster) { std::string msg; diff --git a/cloud/src/resource-manager/resource_manager.h b/cloud/src/resource-manager/resource_manager.h index 5000764dee8a0b..c45b019adc68a5 100644 --- a/cloud/src/resource-manager/resource_manager.h +++ b/cloud/src/resource-manager/resource_manager.h @@ -114,6 +114,25 @@ class ResourceManager { bool check_cluster_params_valid(const ClusterPB& cluster, std::string* err, bool check_master_num); + /** + * Check cloud_unique_id is degraded format, and get instance_id from cloud_unique_id + * degraded format : "${version}:${instance_id}:${unique_id}" + * @param degraded cloud_unique_id + * + * @return a pair, if is_degraded_format == true , instance_id, if is_degraded_format == false, instance_id="" + */ + std::pair get_instance_id_by_degrade_unique_id( + const std::string& cloud_unique_id); + + /** + * check instance_id is a valid instance, check by get fdb kv + * + * @param instance_id + * + * @return true, instance_id in fdb kv + */ + bool check_degrade_instance_valid(const std::string& instance_id); + /** * Refreshes the cache of given instance. This process removes the instance in cache * and then replaces it with persisted instance state read from underlying KV storage. diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index 4254633ca42442..a579148295f261 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -271,7 +271,7 @@ TEST(MetaServiceTest, GetInstanceIdTest) { get_instance_id(ms->resource_mgr(), "1:ALBJLH4Q-check-invalid:m-n3qdpyal27rh8iprxx"); ASSERT_EQ(instance_id, ""); - sp->set_call_back("get_instance", [&](auto&& args) { + sp->set_call_back("check_degrade_instance_valid", [&](auto&& args) { TxnErrorCode* c0 = try_any_cast(args[0]); *c0 = TxnErrorCode::TXN_OK; }); From 6d738706c1f45e484b221b9e0231b888b8c8193b Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Tue, 5 Nov 2024 21:01:05 +0800 Subject: [PATCH 3/6] fix --- .../meta-service/meta_service_resource.cpp | 29 +++++++++---------- .../src/resource-manager/resource_manager.cpp | 11 +++++++ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 27010606155ec1..392ce0de4ee2ec 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -1922,8 +1922,17 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, RPC_PREPROCESS(alter_cluster); std::string cloud_unique_id = request->has_cloud_unique_id() ? request->cloud_unique_id() : ""; instance_id = request->has_instance_id() ? request->instance_id() : ""; - bool is_degrade_format = false; if (!cloud_unique_id.empty() && instance_id.empty()) { + auto [is_degraded_format, id] = + resource_mgr_->get_instance_id_by_degrade_unique_id(cloud_unique_id); + if (config::enable_check_cloud_unique_id_degrade_format && is_degraded_format && + !resource_mgr_->check_degrade_instance_valid(id)) { + msg = "use degrade cloud_unique_id, but instance_id invalid, cloud_unique_id=" + + cloud_unique_id; + LOG(WARNING) << msg; + code = MetaServiceCode::INVALID_ARGUMENT; + return; + } instance_id = get_instance_id(resource_mgr_, cloud_unique_id); if (instance_id.empty()) { code = MetaServiceCode::INVALID_ARGUMENT; @@ -1931,16 +1940,6 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, LOG(INFO) << msg << ", cloud_unique_id=" << cloud_unique_id; return; } - auto [is_degrade, _] = resource_mgr_->get_instance_id_by_degrade_unique_id(cloud_unique_id); - is_degrade_format = is_degrade; - } - - if (config::enable_check_cloud_unique_id_degrade_format && is_degrade_format && - !resource_mgr_->check_degrade_instance_valid(instance_id)) { - msg = "use degrade cloud_unique_id, but instance_id invalid, cloud_unique_id=" + - cloud_unique_id; - LOG(WARNING) << msg; - code = MetaServiceCode::INVALID_ARGUMENT; } if (instance_id.empty() || !request->has_cluster()) { @@ -1983,7 +1982,7 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, case AlterClusterRequest::ADD_NODE: { resource_mgr_->check_cluster_params_valid(request->cluster(), &msg, false); if (msg != "") { - LOG(INFO) << msg; + LOG(WARNING) << msg; break; } std::vector to_add; @@ -2007,7 +2006,7 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, case AlterClusterRequest::DROP_NODE: { resource_mgr_->check_cluster_params_valid(request->cluster(), &msg, false); if (msg != "") { - LOG(INFO) << msg; + LOG(WARNING) << msg; break; } std::vector to_add; @@ -2030,7 +2029,7 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, case AlterClusterRequest::DECOMMISSION_NODE: { resource_mgr_->check_cluster_params_valid(request->cluster(), &msg, false); if (msg != "") { - LOG(INFO) << msg; + LOG(WARNING) << msg; break; } @@ -2092,7 +2091,7 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, case AlterClusterRequest::NOTIFY_DECOMMISSIONED: { resource_mgr_->check_cluster_params_valid(request->cluster(), &msg, false); if (msg != "") { - LOG(INFO) << msg; + LOG(WARNING) << msg; break; } diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index ef743aaadc9750..e60bfbbd9e263b 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -160,6 +160,17 @@ bool ResourceManager::check_cluster_params_valid(const ClusterPB& cluster, std:: int master_num = 0; int follower_num = 0; for (auto& n : cluster.nodes()) { + // check here cloud_unique_id + std::string cloud_unique_id = n.cloud_unique_id(); + auto [is_degrade_format, instance_id] = + get_instance_id_by_degrade_unique_id(cloud_unique_id); + if (config::enable_check_cloud_unique_id_degrade_format && is_degrade_format && + !check_degrade_instance_valid(instance_id)) { + ss << "node=" << n.DebugString() + << " cloud_unique_id use degrade format, but check instance failed"; + *err = ss.str(); + return false; + } if (ClusterPB::SQL == cluster.type() && n.has_edit_log_port() && n.edit_log_port() && n.has_node_type() && (n.node_type() == NodeInfoPB_NodeType_FE_MASTER || From 7f173bcae5eda237600ed19d06849dc289390485 Mon Sep 17 00:00:00 2001 From: deardeng Date: Wed, 6 Nov 2024 16:56:01 +0800 Subject: [PATCH 4/6] fix review --- cloud/src/common/config.h | 2 +- cloud/src/meta-service/meta_service.cpp | 10 +++++----- .../meta-service/meta_service_resource.cpp | 6 +++--- .../src/resource-manager/resource_manager.cpp | 20 +++++++++---------- cloud/src/resource-manager/resource_manager.h | 4 ++-- cloud/test/meta_service_test.cpp | 8 ++++---- 6 files changed, 24 insertions(+), 26 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index af8b8f90469ff3..89862a74b144f0 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -220,5 +220,5 @@ CONF_Int32(max_tablet_index_num_per_batch, "1000"); // Max aborted txn num for the same label name CONF_mInt64(max_num_aborted_txn, "100"); -CONF_Bool(enable_check_cloud_unique_id_degrade_format, "true"); +CONF_Bool(enable_check_instance_id, "true"); } // namespace doris::cloud::config diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 2d90815b99c81a..2f1d8bf473fdc8 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -93,7 +93,7 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, // cache can't find cloud_unique_id, so degraded by parse cloud_unique_id // cloud_unique_id encode: ${version}:${instance_id}:${unique_id} // check it split by ':' c - auto [valid, instance_id] = rc_mgr->get_instance_id_by_degrade_unique_id(cloud_unique_id); + auto [valid, id] = ResourceManager::get_instance_id_by_cloud_unique_id(cloud_unique_id); if (!valid) { LOG(WARNING) << "use degraded format cloud_unique_id, but cloud_unique_id not degrade " "format, cloud_unique_id=" @@ -102,12 +102,11 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, } // check instance_id valid by get fdb - if (config::enable_check_cloud_unique_id_degrade_format && - !rc_mgr->check_degrade_instance_valid(instance_id)) { + if (config::enable_check_instance_id && !rc_mgr->is_instance_id_registered(id)) { LOG(WARNING) << "use degraded format cloud_unique_id, but check instance failed"; return ""; } - return instance_id; + return id; } for (auto& node : nodes) { @@ -120,7 +119,8 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, instance_id = node.instance_id; // The last wins // check cache unique_id std::string cloud_unique_id_in_cache = node.node_info.cloud_unique_id(); - auto [valid, id] = rc_mgr->get_instance_id_by_degrade_unique_id(cloud_unique_id_in_cache); + auto [valid, id] = + ResourceManager::get_instance_id_by_cloud_unique_id(cloud_unique_id_in_cache); if (!valid) { continue; } diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 392ce0de4ee2ec..3cdba8f1885e44 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -1924,9 +1924,9 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, instance_id = request->has_instance_id() ? request->instance_id() : ""; if (!cloud_unique_id.empty() && instance_id.empty()) { auto [is_degraded_format, id] = - resource_mgr_->get_instance_id_by_degrade_unique_id(cloud_unique_id); - if (config::enable_check_cloud_unique_id_degrade_format && is_degraded_format && - !resource_mgr_->check_degrade_instance_valid(id)) { + ResourceManager::get_instance_id_by_cloud_unique_id(cloud_unique_id); + if (config::enable_check_instance_id && is_degraded_format && + !resource_mgr_->is_instance_id_registered(id)) { msg = "use degrade cloud_unique_id, but instance_id invalid, cloud_unique_id=" + cloud_unique_id; LOG(WARNING) << msg; diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index e60bfbbd9e263b..9c37d781765510 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -162,10 +162,9 @@ bool ResourceManager::check_cluster_params_valid(const ClusterPB& cluster, std:: for (auto& n : cluster.nodes()) { // check here cloud_unique_id std::string cloud_unique_id = n.cloud_unique_id(); - auto [is_degrade_format, instance_id] = - get_instance_id_by_degrade_unique_id(cloud_unique_id); - if (config::enable_check_cloud_unique_id_degrade_format && is_degrade_format && - !check_degrade_instance_valid(instance_id)) { + auto [is_degrade_format, instance_id] = get_instance_id_by_cloud_unique_id(cloud_unique_id); + if (config::enable_check_instance_id && is_degrade_format && + !is_instance_id_registered(instance_id)) { ss << "node=" << n.DebugString() << " cloud_unique_id use degrade format, but check instance failed"; *err = ss.str(); @@ -211,7 +210,7 @@ bool ResourceManager::check_cluster_params_valid(const ClusterPB& cluster, std:: return no_err; } -std::pair ResourceManager::get_instance_id_by_degrade_unique_id( +std::pair ResourceManager::get_instance_id_by_cloud_unique_id( const std::string& cloud_unique_id) { auto v = split(cloud_unique_id, ':'); if (v.size() != 3) return {false, ""}; @@ -221,16 +220,15 @@ std::pair ResourceManager::get_instance_id_by_degrade_unique_ return {true, v[1]}; } -bool ResourceManager::check_degrade_instance_valid(const std::string& instance_id) { +bool ResourceManager::is_instance_id_registered(const std::string& instance_id) { // check kv auto [c0, m0] = get_instance(nullptr, instance_id, nullptr); - { TEST_SYNC_POINT_CALLBACK("check_degrade_instance_valid", &c0); } + { TEST_SYNC_POINT_CALLBACK("is_instance_id_registered", &c0); } if (c0 != TxnErrorCode::TXN_OK) { - LOG(WARNING) << "check instance instance_id=" << instance_id - << " failed, code=" << format_as(c0) << ", info=" + m0; - return false; + LOG(WARNING) << "failed to check instance instance_id=" << instance_id + << ", code=" << format_as(c0) << ", info=" + m0; } - return true; + return c0 == TxnErrorCode::TXN_OK; } std::pair ResourceManager::add_cluster(const std::string& instance_id, diff --git a/cloud/src/resource-manager/resource_manager.h b/cloud/src/resource-manager/resource_manager.h index c45b019adc68a5..9e6f4548d244b7 100644 --- a/cloud/src/resource-manager/resource_manager.h +++ b/cloud/src/resource-manager/resource_manager.h @@ -121,7 +121,7 @@ class ResourceManager { * * @return a pair, if is_degraded_format == true , instance_id, if is_degraded_format == false, instance_id="" */ - std::pair get_instance_id_by_degrade_unique_id( + static std::pair get_instance_id_by_cloud_unique_id( const std::string& cloud_unique_id); /** @@ -131,7 +131,7 @@ class ResourceManager { * * @return true, instance_id in fdb kv */ - bool check_degrade_instance_valid(const std::string& instance_id); + bool is_instance_id_registered(const std::string& instance_id); /** * Refreshes the cache of given instance. This process removes the instance in cache diff --git a/cloud/test/meta_service_test.cpp b/cloud/test/meta_service_test.cpp index a579148295f261..75229a31b81138 100644 --- a/cloud/test/meta_service_test.cpp +++ b/cloud/test/meta_service_test.cpp @@ -56,7 +56,7 @@ int main(int argc, char** argv) { config::enable_txn_store_retry = true; config::txn_store_retry_base_intervals_ms = 1; config::txn_store_retry_times = 20; - config::enable_check_cloud_unique_id_degrade_format = false; + config::enable_check_instance_id = false; if (!doris::cloud::init_glog("meta_service_test")) { std::cerr << "failed to init glog" << std::endl; @@ -265,20 +265,20 @@ TEST(MetaServiceTest, GetInstanceIdTest) { "12345678901:ALBJLH4Q:m-n3qdpyal27rh8iprxx"); ASSERT_EQ(instance_id, ""); - config::enable_check_cloud_unique_id_degrade_format = true; + config::enable_check_instance_id = true; auto ms = get_meta_service(false); instance_id = get_instance_id(ms->resource_mgr(), "1:ALBJLH4Q-check-invalid:m-n3qdpyal27rh8iprxx"); ASSERT_EQ(instance_id, ""); - sp->set_call_back("check_degrade_instance_valid", [&](auto&& args) { + sp->set_call_back("is_instance_id_registered", [&](auto&& args) { TxnErrorCode* c0 = try_any_cast(args[0]); *c0 = TxnErrorCode::TXN_OK; }); instance_id = get_instance_id(ms->resource_mgr(), "1:ALBJLH4Q-check-invalid:m-n3qdpyal27rh8iprxx"); ASSERT_EQ(instance_id, "ALBJLH4Q-check-invalid"); - config::enable_check_cloud_unique_id_degrade_format = false; + config::enable_check_instance_id = false; sp->clear_all_call_backs(); sp->clear_trace(); From ae2fc37d04047e03b29f91f0283ea258a43874ca Mon Sep 17 00:00:00 2001 From: deardeng Date: Fri, 8 Nov 2024 19:14:52 +0800 Subject: [PATCH 5/6] add log --- cloud/src/meta-service/meta_service.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index 2f1d8bf473fdc8..de378bffa7f67e 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -103,7 +103,9 @@ std::string get_instance_id(const std::shared_ptr& rc_mgr, // check instance_id valid by get fdb if (config::enable_check_instance_id && !rc_mgr->is_instance_id_registered(id)) { - LOG(WARNING) << "use degraded format cloud_unique_id, but check instance failed"; + LOG(WARNING) << "use degraded format cloud_unique_id, but check instance failed, " + "cloud_unique_id=" + << cloud_unique_id; return ""; } return id; From ade5b0fbf1a174c8c7f0db2d92819795e748952a Mon Sep 17 00:00:00 2001 From: deardeng Date: Fri, 8 Nov 2024 19:32:59 +0800 Subject: [PATCH 6/6] fix ut --- cloud/test/fdb_injection_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cloud/test/fdb_injection_test.cpp b/cloud/test/fdb_injection_test.cpp index 125ae2b6b04170..08ba3e50e5253f 100644 --- a/cloud/test/fdb_injection_test.cpp +++ b/cloud/test/fdb_injection_test.cpp @@ -70,6 +70,7 @@ int main(int argc, char** argv) { cloud::config::txn_store_retry_base_intervals_ms = 1; cloud::config::fdb_cluster_file_path = "fdb.cluster"; cloud::config::write_schema_kv = true; + cloud::config::enable_check_instance_id = false; auto sp = SyncPoint::get_instance(); sp->enable_processing();