From 4775e7aefed93772b030453d2e3024755fada04c Mon Sep 17 00:00:00 2001 From: deardeng Date: Thu, 31 Jul 2025 11:22:11 +0800 Subject: [PATCH 1/2] [Fix](case) Fix `test_rename_compute_group` due to vcg pr pick (#54106) ### What problem does this PR solve? Fix missing logic #46221 fix rename but when developing vcg logic, missing pr #46221 functionality pick form vcg branch, so miss #46221 logic, --- .../meta-service/meta_service_resource.cpp | 23 +++++++++++++++---- .../src/resource-manager/resource_manager.cpp | 11 +++++---- .../multi_cluster/test_no_cluster_hits.groovy | 12 ++++++---- .../cloud_p0/node_mgr/test_ms_api.groovy | 8 +++---- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 4bb5a99c5ad067..0ae3f3c6c54525 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -2509,8 +2509,8 @@ void handle_notify_decommissioned(const std::string& instance_id, } void handle_rename_cluster(const std::string& instance_id, const ClusterInfo& cluster, - std::shared_ptr resource_mgr, std::string& msg, - MetaServiceCode& code) { + std::shared_ptr resource_mgr, bool replace, + std::string& msg, MetaServiceCode& code) { msg = resource_mgr->update_cluster( instance_id, cluster, [&](const ClusterPB& i) { return i.cluster_id() == cluster.cluster.cluster_id(); }, @@ -2522,6 +2522,10 @@ void handle_rename_cluster(const std::string& instance_id, const ClusterInfo& cl cluster_names.emplace(cluster_in_instance.cluster_name()); } auto it = cluster_names.find(cluster.cluster.cluster_name()); + LOG(INFO) << "cluster.cluster.cluster_name(): " << cluster.cluster.cluster_name(); + for (auto itt : cluster_names) { + LOG(INFO) << "instance's cluster name : " << itt; + } if (it != cluster_names.end()) { code = MetaServiceCode::INVALID_ARGUMENT; ss << "failed to rename cluster, a cluster with the same name already exists " @@ -2539,7 +2543,8 @@ void handle_rename_cluster(const std::string& instance_id, const ClusterInfo& cl } c.set_cluster_name(cluster.cluster.cluster_name()); return msg; - }); + }, + replace); } void handle_update_cluster_endpoint(const std::string& instance_id, const ClusterInfo& cluster, @@ -2766,9 +2771,17 @@ void MetaServiceImpl::alter_cluster(google::protobuf::RpcController* controller, case AlterClusterRequest::NOTIFY_DECOMMISSIONED: handle_notify_decommissioned(instance_id, request, resource_mgr(), msg, code); break; - case AlterClusterRequest::RENAME_CLUSTER: - handle_rename_cluster(instance_id, cluster, resource_mgr(), msg, code); + case AlterClusterRequest::RENAME_CLUSTER: { + // SQL mode, cluster cluster name eq empty cluster name, need drop empty cluster first. + // but in http api, cloud control will drop empty cluster + bool replace_if_existing_empty_target_cluster = + request->has_replace_if_existing_empty_target_cluster() + ? request->replace_if_existing_empty_target_cluster() + : false; + handle_rename_cluster(instance_id, cluster, resource_mgr(), + replace_if_existing_empty_target_cluster, msg, code); break; + } case AlterClusterRequest::UPDATE_CLUSTER_ENDPOINT: handle_update_cluster_endpoint(instance_id, cluster, resource_mgr(), msg, code); break; diff --git a/cloud/src/resource-manager/resource_manager.cpp b/cloud/src/resource-manager/resource_manager.cpp index 5c610a234b109d..e79043475d0533 100644 --- a/cloud/src/resource-manager/resource_manager.cpp +++ b/cloud/src/resource-manager/resource_manager.cpp @@ -811,10 +811,8 @@ std::string ResourceManager::update_cluster( } std::vector clusters_in_instance; - std::set cluster_names; // collect cluster in instance pb for check for (auto& i : instance.clusters()) { - cluster_names.emplace(i.cluster_name()); clusters_in_instance.emplace_back(i); } @@ -845,8 +843,11 @@ std::string ResourceManager::update_cluster( // check cluster_name is empty cluster, if empty and replace_if_existing_empty_target_cluster == true, drop it if (replace_if_existing_empty_target_cluster) { - auto it = cluster_names.find(cluster_name); - if (it != cluster_names.end()) { + auto it = std::find_if(clusters_in_instance.begin(), clusters_in_instance.end(), + [&cluster_name](const auto& cluster) { + return cluster_name == cluster.cluster_name(); + }); + if (it != clusters_in_instance.end()) { // found it, if it's an empty cluster, drop it from instance int idx = -1; for (auto& cluster : instance.clusters()) { @@ -859,7 +860,7 @@ std::string ResourceManager::update_cluster( instance.clusters()); clusters.DeleteSubrange(idx, 1); // Remove cluster name from set - cluster_names.erase(cluster_name); + clusters_in_instance.erase(it); LOG(INFO) << "remove empty cluster due to it is the target of a " "rename_cluster, cluster_name=" << cluster_name; diff --git a/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy b/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy index 193824a65408b9..82a77ecb368b39 100644 --- a/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy +++ b/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy @@ -93,13 +93,15 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') { """ } catch (Exception e) { logger.info("exception: {}", e.getMessage()) - // assertTrue(e.getMessage().contains("ComputeGroupException: COMPUTE_GROUPS_NO_ALIVE_BE")) - // assertTrue(e.getMessage().contains("are in an abnormal state")) + // in 3.0 + assertTrue(e.getMessage().contains("ComputeGroupException: COMPUTE_GROUPS_NO_ALIVE_BE")) + assertTrue(e.getMessage().contains("are in an abnormal state")) + // in master // The new optimizer code modifies the path and returns a different exception message // exception: errCode = 2, detailMessage = No backend available as scan node, please check the status of your // backends.[1747384136706: not alive, 1747384136705: not alive, 1747384136704: not alive] - assertTrue(e.getMessage().contains("No backend available as scan node")) + // assertTrue(e.getMessage().contains("No backend available as scan node")) } try { @@ -152,13 +154,13 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') { } try { - // errCode = 2, detailMessage = Can not find compute group:compute_cluster + // errCode = 2, detailMessage = The current compute group compute_cluster is not registered in the system sql """ select * from $table """ } catch (Exception e) { logger.info("exception: {}", e.getMessage()) - assertTrue(e.getMessage().contains("Can not find compute group")) + assertTrue(e.getMessage().contains("The current compute group compute_cluster is not registered in the system")) } } } diff --git a/regression-test/suites/cloud_p0/node_mgr/test_ms_api.groovy b/regression-test/suites/cloud_p0/node_mgr/test_ms_api.groovy index 9cdc56e561a85e..2df7815e769427 100644 --- a/regression-test/suites/cloud_p0/node_mgr/test_ms_api.groovy +++ b/regression-test/suites/cloud_p0/node_mgr/test_ms_api.groovy @@ -1176,7 +1176,7 @@ suite('test_ms_api', 'p0, docker') { log.info("add two observer fe failed test http cli result: ${body} ${respCode}".toString()) def json = parseJson(body) assertTrue(json.code.equalsIgnoreCase("INVALID_ARGUMENT")) - assertTrue(json.msg.contains("cluster is SQL type, but not set master and follower node, master count=0 follower count=0 so sql cluster can't get a Master node")) + assertTrue(json.msg.contains("cluster is SQL type, must have only one master node, now master count: 0")) } def ip4 = "162.0.0.4" @@ -1202,7 +1202,7 @@ suite('test_ms_api', 'p0, docker') { // failed, due to two master node // if force_change_to_multi_follower_mode == false, check type not changed, FE_MASTER log.info("add some fe failed nodes http cli result: ${body} ${respCode} ${json}".toString()) - assertTrue(json.code.equalsIgnoreCase("INTERNAL_ERROR")) + assertTrue(json.code.equalsIgnoreCase("INVALID_ARGUMENT")) assertTrue(json.msg.contains("instance invalid, cant modify, plz check")) } @@ -1228,7 +1228,7 @@ suite('test_ms_api', 'p0, docker') { respCode, body -> def json = parseJson(body) log.info("drop all fe nodes failed http cli result: ${body} ${respCode} ${json}".toString()) - assertTrue(json.code.equalsIgnoreCase("INTERNAL_ERROR")) + assertTrue(json.code.equalsIgnoreCase("INVALID_ARGUMENT")) assertTrue(json.msg.contains("instance invalid, cant modify, plz check")) } @@ -1396,7 +1396,7 @@ suite('test_ms_api', 'p0, docker') { respCode, body -> def json = parseJson(body) log.info("drop fe observer node http cli result: ${body} ${respCode} ${json}".toString()) - assertTrue(json.code.equalsIgnoreCase("INTERNAL_ERROR")) + assertTrue(json.code.equalsIgnoreCase("INVALID_ARGUMENT")) assertTrue(json.msg.contains("drop fe node not in safe time, try later")) } From 8b9ddac153a8795fab6f23749abd11924b5b2344 Mon Sep 17 00:00:00 2001 From: deardeng Date: Sun, 28 Sep 2025 16:20:31 +0800 Subject: [PATCH 2/2] fix --- .../cloud_p0/multi_cluster/test_no_cluster_hits.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy b/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy index 82a77ecb368b39..f7959267668f37 100644 --- a/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy +++ b/regression-test/suites/cloud_p0/multi_cluster/test_no_cluster_hits.groovy @@ -94,14 +94,14 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') { } catch (Exception e) { logger.info("exception: {}", e.getMessage()) // in 3.0 - assertTrue(e.getMessage().contains("ComputeGroupException: COMPUTE_GROUPS_NO_ALIVE_BE")) - assertTrue(e.getMessage().contains("are in an abnormal state")) + // assertTrue(e.getMessage().contains("ComputeGroupException: COMPUTE_GROUPS_NO_ALIVE_BE")) + // assertTrue(e.getMessage().contains("are in an abnormal state")) // in master // The new optimizer code modifies the path and returns a different exception message // exception: errCode = 2, detailMessage = No backend available as scan node, please check the status of your // backends.[1747384136706: not alive, 1747384136705: not alive, 1747384136704: not alive] - // assertTrue(e.getMessage().contains("No backend available as scan node")) + assertTrue(e.getMessage().contains("No backend available as scan node")) } try { @@ -154,13 +154,13 @@ suite('test_no_cluster_hits', 'multi_cluster, docker') { } try { - // errCode = 2, detailMessage = The current compute group compute_cluster is not registered in the system + // errCode = 2, detailMessage = Can not find compute group:compute_cluster sql """ select * from $table """ } catch (Exception e) { logger.info("exception: {}", e.getMessage()) - assertTrue(e.getMessage().contains("The current compute group compute_cluster is not registered in the system")) + assertTrue(e.getMessage().contains("Can not find compute group")) } } }