From 992578c7214a6f638d53d0f3562a1222d2adca16 Mon Sep 17 00:00:00 2001 From: morningman Date: Tue, 27 Oct 2020 15:18:59 +0800 Subject: [PATCH] [TabletScheduler] Fix some bug where decommission operations cannot be completed 1. When we decommission some BEs with SSD disks, if there are no SSD disks on the remaining BEs, it will be impossible to select a suitable destination path. In this case, we need to ignore the storage medium property and try to select the destination path again. Set `isSupplement` to true will ignore the storage medium property. 2. When the BE nodes where all replicas of a tablet are located are decommission, and this task is a VERSION_INCOMPLETE task. This will lead to failure to select a suitable dest replica. At this time, we should try to convert this task to a REPLICA_MISSING task, and schedule it again. --- .../apache/doris/clone/TabletScheduler.java | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index d425a489dd713c..37205870d529c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -632,7 +632,25 @@ private void handleReplicaVersionIncomplete(TabletSchedCtx tabletCtx, AgentBatch throw new SchedException(Status.UNRECOVERABLE, "cluster does not exist"); } - tabletCtx.chooseDestReplicaForVersionIncomplete(backendsWorkingSlots); + try { + tabletCtx.chooseDestReplicaForVersionIncomplete(backendsWorkingSlots); + } catch (SchedException e) { + if (e.getMessage().equals("unable to choose dest replica")) { + // This situation may occur when the BE nodes where all replicas of a tablet are located are decommission, + // and this task is a VERSION_INCOMPLETE task. + // This will lead to failure to select a suitable dest replica. + // At this time, we try to convert this task to a REPLICA_MISSING task, and schedule it again. + LOG.debug("failed to find version incomplete replica for VERSION_INCOMPLETE task. tablet id: {}, " + + "try to find a new backend", tabletCtx.getTabletId()); + tabletCtx.releaseResource(this, true); + tabletCtx.setTabletStatus(TabletStatus.REPLICA_MISSING); + handleReplicaMissing(tabletCtx, batchTask); + LOG.debug("succeed to find new backend for VERSION_INCOMPLETE task. tablet id: {}", tabletCtx.getTabletId()); + return; + } else { + throw e; + } + } tabletCtx.chooseSrcReplicaForVersionIncomplete(backendsWorkingSlots); // create clone task @@ -660,18 +678,19 @@ private void handleReplicaRelocating(TabletSchedCtx tabletCtx, AgentBatchTask ba stat.counterReplicaUnavailableErr.incrementAndGet(); try { handleReplicaVersionIncomplete(tabletCtx, batchTask); - LOG.info("succeed to find version incomplete replica from tablet relocating. tablet id: {}", + LOG.debug("succeed to find version incomplete replica from tablet relocating. tablet id: {}", tabletCtx.getTabletId()); } catch (SchedException e) { if (e.getStatus() == Status.SCHEDULE_FAILED) { - LOG.info("failed to find version incomplete replica from tablet relocating. tablet id: {}, " + LOG.debug("failed to find version incomplete replica from tablet relocating. tablet id: {}, " + "try to find a new backend", tabletCtx.getTabletId()); // the dest or src slot may be taken after calling handleReplicaVersionIncomplete(), // so we need to release these slots first. // and reserve the tablet in TabletSchedCtx so that it can continue to be scheduled. tabletCtx.releaseResource(this, true); + tabletCtx.setTabletStatus(TabletStatus.REPLICA_MISSING); handleReplicaMissing(tabletCtx, batchTask); - LOG.info("succeed to find new backend for tablet relocating. tablet id: {}", tabletCtx.getTabletId()); + LOG.debug("succeed to find new backend for tablet relocating. tablet id: {}", tabletCtx.getTabletId()); } else { throw e; } @@ -1057,8 +1076,17 @@ private RootPathLoadStatistic chooseAvailableDestPath(TabletSchedCtx tabletCtx, resultPaths, tabletCtx.getTabletStatus() != TabletStatus.REPLICA_RELOCATING /* if REPLICA_RELOCATING, then it is not a supplement task */); if (!st.ok()) { - LOG.debug("unable to find path for supplementing tablet: {}. {}", tabletCtx, st); - continue; + // This is to solve, when we decommission some BEs with SSD disks, + // if there are no SSD disks on the remaining BEs, it will be impossible to select a + // suitable destination path. + // In this case, we need to ignore the storage medium property and try to select the destination path again. + // Set `isSupplement` to true will ignore the storage medium property. + st = bes.isFit(tabletCtx.getTabletSize(), tabletCtx.getStorageMedium(), + resultPaths, true); + if (!st.ok()) { + LOG.debug("unable to find path for supplementing tablet: {}. {}", tabletCtx, st); + continue; + } } Preconditions.checkState(resultPaths.size() == 1);