From 5d8583d2c24368c3582d8f1439743f4fc64dd206 Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Wed, 18 Aug 2021 20:34:48 -0700 Subject: [PATCH 1/2] HBASE-25815 RSGroupBasedLoadBalancer online status never updates after being set to true for the first time --- .../hbase/rsgroup/RSGroupInfoManagerImpl.java | 24 +++++++++++++++---- .../hbase/rsgroup/EnableRSGroupsTestBase.java | 22 ++++++++++++++++- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 3ef9365456fd..94420942b848 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -32,7 +32,9 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; @@ -661,13 +663,13 @@ private synchronized void flushConfig(Map newGroupMap) thro return; } - /* For online mode, persist to hbase:rsgroup and Zookeeper */ - flushConfigTable(newGroupMap); - - // Make changes visible after having been persisted to the source of truth + // Make changes visible resetRSGroupMap(newGroupMap); saveRSGroupMapToZK(newGroupMap); updateCacheOfRSGroups(newGroupMap.keySet()); + + /* For online mode, persist to hbase:rsgroup and Zookeeper */ + flushConfigTable(newGroupMap); } private void saveRSGroupMapToZK(Map newGroupMap) throws IOException { @@ -825,6 +827,20 @@ private void createRSGroupTable() throws IOException { } public boolean isOnline() { + if (isMasterRunning(masterServices)) { + try { + // try reading from the table + CompletableFuture read = conn.getTable(RSGROUP_TABLE_NAME).get(new Get(ROW_KEY)); + if (read.get(10000, TimeUnit.MILLISECONDS) != null) { + online = true; + } + } catch (Exception e) { + LOG.warn("Failed to read from " + RSGROUP_TABLE_NAME+ "; setting online = false"); + online = false; + } + } else { + online = false; + } return online; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/EnableRSGroupsTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/EnableRSGroupsTestBase.java index 9611bafc2c9e..516975dc91ec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/EnableRSGroupsTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/EnableRSGroupsTestBase.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.rsgroup; import static java.lang.Thread.sleep; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -25,6 +26,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -72,7 +74,25 @@ public void testEnableRSGroup() throws IOException, InterruptedException { (RSGroupBasedLoadBalancer) TEST_UTIL.getMiniHBaseCluster().getMaster().getLoadBalancer(); long start = EnvironmentEdgeManager.currentTime(); while (EnvironmentEdgeManager.currentTime() - start <= 60000 && !loadBalancer.isOnline()) { - LOG.info("waiting for rsgroup load balancer onLine..."); + LOG.info("Waiting for rsgroup load balancer online..."); + sleep(200); + } + + assertTrue(loadBalancer.isOnline()); + + // kill all RS, RSGroupBasedLoadBalancer should now be offline since rsgroup table unavailable + for (JVMClusterUtil.RegionServerThread t: + TEST_UTIL.getMiniHBaseCluster().getRegionServerThreads()) { + TEST_UTIL.getMiniHBaseCluster().killRegionServer( + t.getRegionServer().getServerName()); + } + + assertFalse(loadBalancer.isOnline()); + + TEST_UTIL.getMiniHBaseCluster().startRegionServer(); + start = EnvironmentEdgeManager.currentTime(); + while (EnvironmentEdgeManager.currentTime() - start <= 60000 && !loadBalancer.isOnline()) { + LOG.info("Waiting for rsgroup load balancer online..."); sleep(200); } From 4fa64e3e8a72321d964be82bd3ac0e2b3ceb203a Mon Sep 17 00:00:00 2001 From: Caroline Zhou Date: Fri, 20 Aug 2021 16:56:44 -0700 Subject: [PATCH 2/2] update prev map after changes flushed --- .../apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 94420942b848..67cb0b64e763 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -665,11 +665,13 @@ private synchronized void flushConfig(Map newGroupMap) thro // Make changes visible resetRSGroupMap(newGroupMap); - saveRSGroupMapToZK(newGroupMap); - updateCacheOfRSGroups(newGroupMap.keySet()); /* For online mode, persist to hbase:rsgroup and Zookeeper */ flushConfigTable(newGroupMap); + saveRSGroupMapToZK(newGroupMap); + + // Update previous map + updateCacheOfRSGroups(newGroupMap.keySet()); } private void saveRSGroupMapToZK(Map newGroupMap) throws IOException {