From 8aa8c2d9516cd5c9918080d5986f92d3f26dbeca Mon Sep 17 00:00:00 2001 From: Clara Xiong Date: Wed, 17 Mar 2021 11:39:56 -0700 Subject: [PATCH 1/2] HBASE-25739 TableSkewCostFunction need to use aggregated deviation --- .../master/balancer/BalancerClusterState.java | 73 +++++++++++++------ .../hbase/master/balancer/CostFunction.java | 2 +- .../master/balancer/DoubleArrayCost.java | 50 ++++++++----- .../balancer/TableSkewCostFunction.java | 13 +--- .../master/balancer/TestBaseLoadBalancer.java | 4 +- 5 files changed, 86 insertions(+), 56 deletions(-) diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java index e90064ebaf2d..5260b11692cc 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java @@ -81,7 +81,11 @@ class BalancerClusterState { int[] initialRegionIndexToServerIndex; // regionIndex -> serverIndex (initial cluster state) int[] regionIndexToTableIndex; // regionIndex -> tableIndex int[][] numRegionsPerServerPerTable; // serverIndex -> tableIndex -> # regions - int[] numMaxRegionsPerTable; // tableIndex -> max number of regions in a single RS + int[] numRegionsPerTable; // tableIndex -> region count + double[] meanRegionsPerTable; // mean region count per table + double regionSkewByTable; // skew on RS per by table + double minRegionSkewByTable; // min skew on RS per by table + double maxRegionSkewByTable; // max skew on RS per by table int[] regionIndexToPrimaryIndex; // regionIndex -> regionIndex of the primary boolean hasRegionReplicas = false; // whether there is regions with replicas @@ -291,6 +295,7 @@ public String getRack(ServerName server) { numTables = tables.size(); numRegionsPerServerPerTable = new int[numServers][numTables]; + numRegionsPerTable = new int[numTables]; for (int i = 0; i < numServers; i++) { for (int j = 0; j < numTables; j++) { @@ -298,21 +303,29 @@ public String getRack(ServerName server) { } } + for (int i = 0; i < numTables; i++) { + numRegionsPerTable[i] = 0; + } + for (int i = 0; i < regionIndexToServerIndex.length; i++) { if (regionIndexToServerIndex[i] >= 0) { numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++; + numRegionsPerTable[regionIndexToTableIndex[i]]++; } } - numMaxRegionsPerTable = new int[numTables]; - for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) { - for (tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) { - if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) { - numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex]; - } - } + // Avoid repeated computation for planning + meanRegionsPerTable = new double[numTables]; + maxRegionSkewByTable = 0; + minRegionSkewByTable = 0; + for (int i = 0; i < numTables; i++) { + meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers; + minRegionSkewByTable += DoubleArrayCost.getMinSkew(numRegionsPerTable[i], numServers); + maxRegionSkewByTable += DoubleArrayCost.getMaxSkew(numRegionsPerTable[i], numServers); } + computeRegionSkewPerTable(); + for (int i = 0; i < regions.length; i++) { RegionInfo info = regions[i]; if (RegionReplicaUtil.isDefaultReplica(info)) { @@ -671,22 +684,21 @@ void regionMoved(int region, int oldServer, int newServer) { int tableIndex = regionIndexToTableIndex[region]; if (oldServer >= 0) { numRegionsPerServerPerTable[oldServer][tableIndex]--; + // update regionSkewPerTable for the move from old server + regionSkewByTable += + Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex] + - meanRegionsPerTable[tableIndex]) + - Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex] + 1 + - meanRegionsPerTable[tableIndex]); } numRegionsPerServerPerTable[newServer][tableIndex]++; - // check whether this caused maxRegionsPerTable in the new Server to be updated - if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) { - numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[newServer][tableIndex]; - } else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] - + 1) == numMaxRegionsPerTable[tableIndex]) { - // recompute maxRegionsPerTable since the previous value was coming from the old server - numMaxRegionsPerTable[tableIndex] = 0; - for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) { - if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) { - numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex]; - } - } - } + // update regionSkewPerTable for the move to new server + regionSkewByTable += + Math.abs(numRegionsPerServerPerTable[newServer][tableIndex] + - meanRegionsPerTable[tableIndex]) + - Math.abs(numRegionsPerServerPerTable[newServer][tableIndex] - 1 + - meanRegionsPerTable[tableIndex]); // update for servers int primary = regionIndexToPrimaryIndex[region]; @@ -856,10 +868,25 @@ public String toString() { .append(Arrays.toString(serverIndicesSortedByRegionCount)).append(", regionsPerServer=") .append(Arrays.deepToString(regionsPerServer)); - desc.append(", numMaxRegionsPerTable=").append(Arrays.toString(numMaxRegionsPerTable)) + desc.append(", regionSkewByTable=").append(regionSkewByTable) .append(", numRegions=").append(numRegions).append(", numServers=").append(numServers) .append(", numTables=").append(numTables).append(", numMovedRegions=").append(numMovedRegions) .append('}'); return desc.toString(); } -} \ No newline at end of file + + /** + * Recompute the region skew during init or plan of moves. + */ + private void computeRegionSkewPerTable() { + // reinitialize for recomputation + regionSkewByTable = 0; + + for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) { + for (int tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) { + regionSkewByTable += Math.abs(aNumRegionsPerServerPerTable[tableIndex] + - meanRegionsPerTable[tableIndex]); + } + } + } +} diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java index ecbf40093da0..0b132640644f 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java @@ -98,4 +98,4 @@ protected static double scale(double min, double max, double value) { return Math.max(0d, Math.min(1d, (value - min) / (max - min))); } -} \ No newline at end of file +} diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java index cf20a33d7aeb..9b1f5d736851 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java @@ -63,31 +63,13 @@ private static double computeCost(double[] stats) { double count = stats.length; double mean = total / count; - // Compute max as if all region servers had 0 and one had the sum of all costs. This must be - // a zero sum cost for this to make sense. - double max = ((count - 1) * mean) + (total - mean); - - // It's possible that there aren't enough regions to go around - double min; - if (count > total) { - min = ((count - total) * mean) + ((1 - mean) * total); - } else { - // Some will have 1 more than everything else. - int numHigh = (int) (total - (Math.floor(mean) * count)); - int numLow = (int) (count - numHigh); - - min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean))); - - } - min = Math.max(0, min); for (int i = 0; i < stats.length; i++) { double n = stats[i]; double diff = Math.abs(mean - n); totalCost += diff; } - - double scaled = CostFunction.scale(min, max, totalCost); - return scaled; + return CostFunction.scale(getMinSkew(total, count), + getMaxSkew(total, count), totalCost); } private static double getSum(double[] stats) { @@ -97,4 +79,32 @@ private static double getSum(double[] stats) { } return total; } + + /** + * Return the min skew of distribution + */ + public static double getMinSkew(double total, double numServers) { + double mean = total / numServers; + // It's possible that there aren't enough regions to go around + double min; + if (numServers > total) { + min = ((numServers - total) * mean + (1 - mean) * total) ; + } else { + // Some will have 1 more than everything else. + int numHigh = (int) (total - (Math.floor(mean) * numServers)); + int numLow = (int) (numServers - numHigh); + min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean)); + } + return min; + } + + /** + * Return the max deviation of distribution + * Compute max as if all region servers had 0 and one had the sum of all costs. This must be + * a zero sum cost for this to make sense. + */ + public static double getMaxSkew(double total, double numServers) { + double mean = total / numServers; + return (total - mean) + (numServers - 1) * mean; + } } diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableSkewCostFunction.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableSkewCostFunction.java index 4ab0228ba206..ea53b7a19ae2 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableSkewCostFunction.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableSkewCostFunction.java @@ -37,14 +37,7 @@ class TableSkewCostFunction extends CostFunction { @Override protected double cost() { - double max = cluster.numRegions; - double min = ((double) cluster.numRegions) / cluster.numServers; - double value = 0; - - for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) { - value += cluster.numMaxRegionsPerTable[i]; - } - - return scale(min, max, value); + return scale(cluster.minRegionSkewByTable, + cluster.maxRegionSkewByTable, cluster.regionSkewByTable); } -} \ No newline at end of file +} diff --git a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java index e515eadc26b6..f54d8af4c2da 100644 --- a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java +++ b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java @@ -355,8 +355,8 @@ public void testRegionAvailabilityWithRegionMoves() throws Exception { // now move region1 from servers[0] to servers[2] cluster.doAction(new MoveRegionAction(0, 0, 2)); - // check that the numMaxRegionsPerTable for "table" has increased to 2 - assertEquals(2, cluster.numMaxRegionsPerTable[0]); + // check that the regionSkewByTable for "table" has increased to 2 + assertEquals(2, cluster.regionSkewByTable, 0.01); // now repeat check whether moving region1 from servers[1] to servers[2] // would lower availability assertTrue(cluster.wouldLowerAvailability(hri1, servers[2])); From d6ebcd006bbd7586c385945899914b6beaba0cfc Mon Sep 17 00:00:00 2001 From: Clara Xiong Date: Wed, 9 Jun 2021 13:17:13 -0700 Subject: [PATCH 2/2] increase the upperbound of execution to avoid flaky test --- .../balancer/TestStochasticLoadBalancerBalanceCluster.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java index b7a201916f80..2aefd09f706c 100644 --- a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java +++ b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java @@ -51,8 +51,10 @@ public class TestStochasticLoadBalancerBalanceCluster extends StochasticBalancer */ @Test public void testBalanceCluster() throws Exception { - conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 2000000L); - conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 90 * 1000); // 90 sec + // Set the limits generously to avoid flaky test results. SB is a random walk algorithm and + // therefore might take longer to find the solution at one iteration when we repeat + conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L); + conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 300 * 1000); // 300 sec conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f); loadBalancer.onConfigurationChange(conf); for (int[] mockCluster : clusterStateMocks) {