From 141843ddddd13308355b2b03c55508c6772d4946 Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Tue, 15 Oct 2024 10:26:03 +0800 Subject: [PATCH] [fix](cloud) Fix the error of Cloud forwarding SQL not finding the cluster (#41819) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …uster ## Proposed changes 1. fix follower ddl sql forward not have cluster, throw exception ``` 2024-10-14 14:27:11,076 WARN (mysql-nio-pool-4|110) [MasterOpExecutor.buildStmtForwardParams():220] failed to get cloud compute group org.apache.doris.cloud.qe.ComputeGroupException: errCode = 2, detailMessage = default compute group smoke_test_cluster_01 check auth failed, ComputeGroupException: CURRENT_USER_NO_AUTH_TO_USE_DEFAULT_COMPUTE_GROUP, you can contact the system administrator and request that they grant you the default compute group permissions, use SQL `SHOW PROPERTY like 'default_compute_group'` and `GRANT USAGE_PRIV ON COMPUTE GROUP {compute_group_name} TO {user}` at org.apache.doris.qe.ConnectContext.getCloudCluster(ConnectContext.java:1241) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MasterOpExecutor.buildStmtForwardParams(MasterOpExecutor.java:218) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MasterOpExecutor.execute(MasterOpExecutor.java:89) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.forwardToMaster(StmtExecutor.java:1172) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.executeByLegacy(StmtExecutor.java:981) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:639) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.queryRetry(StmtExecutor.java:557) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:547) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.executeQuery(ConnectProcessor.java:397) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:238) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.handleQuery(MysqlConnectProcessor.java:194) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.dispatch(MysqlConnectProcessor.java:222) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.processOnce(MysqlConnectProcessor.java:281) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?] at java.lang.Thread.run(Thread.java:833) ~[?:?] ``` 2. remove useless log in replay ``` 2024-10-14 10:21:08,479 WARN (replayer|14) [Replica.getBackendIdWithoutException():247] getBackendIdWithoutException: org.apache.doris.cloud.qe.ComputeGroupException: errCode = 2, detailMessage = connect context not set, ComputeGroupException: CONNECT_CONTEXT_NOT_SET, you can contact the system administrator, unless it's a daemon job in log, check your job at org.apache.doris.cloud.catalog.CloudReplica.getCurrentClusterId(CloudReplica.java:244) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.cloud.catalog.CloudReplica.getBackendId(CloudReplica.java:160) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.Replica.getBackendIdWithoutException(Replica.java:245) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.TabletInvertedIndex.addReplica(TabletInvertedIndex.java:631) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.datasource.InternalCatalog.replayAddPartition(InternalCatalog.java:1926) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.Env.replayAddPartition(Env.java:3343) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.persist.EditLog.loadJournal(EditLog.java:257) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.Env.replayJournal(Env.java:2962) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.catalog.Env$4.runOneCycle(Env.java:2724) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.common.util.Daemon.run(Daemon.java:116) ~[doris-fe.jar:1.2-SNAPSHOT] ``` 3. fix SHOW PROC '/dbs/1728872463112/1728872489388/partitions/1728872489387/1728872489389/1728872489392'; ``` 2024-10-14 19:51:02,673 WARN (mysql-nio-pool-0|119) [StmtExecutor.executeByLegacy():1097] execute Exception. stmt[17, 63ff6f3a58d84db5-9f1ab6642ce976de] java.lang.UnsupportedOperationException: null at java.util.AbstractList.add(AbstractList.java:153) ~[?:?] at java.util.AbstractList.add(AbstractList.java:111) ~[?:?] at org.apache.doris.common.proc.ReplicasProcNode.fetchResult(ReplicasProcNode.java:142) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.analysis.ShowProcStmt.getMetaData(ShowProcStmt.java:68) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ShowExecutor.handleShowProc(ShowExecutor.java:786) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ShowExecutor.execute(ShowExecutor.java:317) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.handleShow(StmtExecutor.java:2844) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.executeByLegacy(StmtExecutor.java:1059) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:639) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.queryRetry(StmtExecutor.java:557) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.StmtExecutor.execute(StmtExecutor.java:547) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.executeQuery(ConnectProcessor.java:397) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.ConnectProcessor.handleQuery(ConnectProcessor.java:238) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.handleQuery(MysqlConnectProcessor.java:194) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.dispatch(MysqlConnectProcessor.java:222) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.qe.MysqlConnectProcessor.processOnce(MysqlConnectProcessor.java:281) ~[doris-fe.jar:1.2-SNAPSHOT] at org.apache.doris.mysql.ReadListener.lambda$handleEvent$0(ReadListener.java:52) ~[doris-fe.jar:1.2-SNAPSHOT] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?] at java.lang.Thread.run(Thread.java:833) ~[?:?] ``` Issue Number: close #xxx --- .../org/apache/doris/catalog/TabletInvertedIndex.java | 8 ++++---- .../org/apache/doris/common/proc/ReplicasProcNode.java | 4 ++-- .../main/java/org/apache/doris/qe/MasterOpExecutor.java | 1 - 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index 7ea62f5a56e68c..4a421dc7b2b1ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -630,17 +630,17 @@ public void deleteTablet(long tabletId) { public void addReplica(long tabletId, Replica replica) { long stamp = writeLock(); try { - Preconditions.checkState(tabletMetaMap.containsKey(tabletId), - "tablet " + tabletId + " not exists, replica " + replica.getId() - + ", backend " + replica.getBackendIdWithoutException()); // cloud mode, create table not need backendId, represent with -1. long backendId = Config.isCloudMode() ? -1 : replica.getBackendIdWithoutException(); + Preconditions.checkState(tabletMetaMap.containsKey(tabletId), + "tablet " + tabletId + " not exists, replica " + replica.getId() + + ", backend " + backendId); replicaMetaTable.put(tabletId, backendId, replica); replicaToTabletMap.put(replica.getId(), tabletId); backingReplicaMetaTable.put(backendId, tabletId, replica); if (LOG.isDebugEnabled()) { LOG.debug("add replica {} of tablet {} in backend {}", - replica.getId(), tabletId, replica.getBackendIdWithoutException()); + replica.getId(), tabletId, backendId); } } finally { writeUnlock(stamp); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java index cba5432cc14c80..d7958f75504ede 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/ReplicasProcNode.java @@ -33,8 +33,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; -import java.util.Arrays; import java.util.List; /* @@ -116,7 +116,7 @@ public ProcResult fetchResult() throws AnalysisException { if (Config.enable_query_hit_stats) { queryHits = QueryStatsUtil.getMergedReplicaStats(replica.getId()); } - List replicaInfo = Arrays.asList(String.valueOf(replica.getId()), + List replicaInfo = Lists.newArrayList(String.valueOf(replica.getId()), String.valueOf(replica.getBackendIdWithoutException()), String.valueOf(replica.getVersion()), String.valueOf(replica.getLastSuccessVersion()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/MasterOpExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/MasterOpExecutor.java index 1df8dda4e8f196..b83838cdd07a0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/MasterOpExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/MasterOpExecutor.java @@ -218,7 +218,6 @@ private TMasterOpRequest buildStmtForwardParams() throws AnalysisException { cluster = ctx.getCloudCluster(false); } catch (Exception e) { LOG.warn("failed to get cloud compute group", e); - throw new AnalysisException("failed to get cloud compute group", e); } if (!Strings.isNullOrEmpty(cluster)) { params.setCloudCluster(cluster);