diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java index 3c72c9f77b7868..962d0980a3b3af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableBalancer.java @@ -546,6 +546,11 @@ private boolean balance(GroupId groupId, List allAvailBackendIds, Colocate // sort backends with replica num List> backendWithReplicaNum = getSortedBackendReplicaNumPairs(allAvailBackendIds, flatBackendsPerBucketSeq); + // if there is only one available backend, end the outer loop + if (backendWithReplicaNum.size() == 1) { + LOG.info("there is only one available backend, end the outer loop in colocate group {}", groupId); + break; + } int i = 0; int j = backendWithReplicaNum.size() - 1; @@ -600,11 +605,18 @@ private boolean balance(GroupId groupId, List allAvailBackendIds, Colocate } if (!isThisRoundChanged) { - // select another load backend and try again LOG.info("unable to replace backend {} with backend {} in colocate group {}", srcBeId, destBeId, groupId); - j--; - continue; + if (--j == i) { + // if all backends are checked but this round is not changed, + // we should end the outer loop to avoid endless loops + LOG.info("all backends are checked but this round is not changed, " + + "end outer loop in colocate group {}", groupId); + break OUT; + } else { + // select another load backend and try again + continue; + } } break; diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableBalancerTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableBalancerTest.java index a94377bdc24605..bb7612616a9836 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableBalancerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/ColocateTableBalancerTest.java @@ -151,4 +151,37 @@ public void testBalance() { Assert.assertFalse(changed); Assert.assertTrue(balancedBackendsPerBucketSeq.isEmpty()); } + + @Test + public void testFixBalanceEndlessLoop() { + GroupId groupId = new GroupId(10000, 10001); + List distributionCols = Lists.newArrayList(); + distributionCols.add(new Column("k1", PrimitiveType.INT)); + ColocateGroupSchema groupSchema = new ColocateGroupSchema(groupId, distributionCols, 5, (short) 1); + Map group2Schema = Maps.newHashMap(); + group2Schema.put(groupId, groupSchema); + + // 1. only one available backend + // [[7], [7], [7], [7], [7]] + ColocateTableIndex colocateTableIndex = createColocateIndex(groupId, Lists.newArrayList(7L, 7L, 7L, 7L, 7L)); + Deencapsulation.setField(colocateTableIndex, "group2Schema", group2Schema); + + List> balancedBackendsPerBucketSeq = Lists.newArrayList(); + List allAvailBackendIds = Lists.newArrayList(7L); + boolean changed = Deencapsulation.invoke(balancer, "balance", groupId, allAvailBackendIds, + colocateTableIndex, infoService, balancedBackendsPerBucketSeq); + Assert.assertFalse(changed); + + // 2. all backends are checked but this round is not changed + // [[7], [7], [7], [7], [7]] + // and add new backends 8, 9 that are on the same host with 7 + colocateTableIndex = createColocateIndex(groupId, Lists.newArrayList(7L, 7L, 7L, 7L, 7L)); + Deencapsulation.setField(colocateTableIndex, "group2Schema", group2Schema); + + balancedBackendsPerBucketSeq = Lists.newArrayList(); + allAvailBackendIds = Lists.newArrayList(7L, 8L, 9L); + changed = Deencapsulation.invoke(balancer, "balance", groupId, allAvailBackendIds, + colocateTableIndex, infoService, balancedBackendsPerBucketSeq); + Assert.assertFalse(changed); + } }