From 230cdb6b52960b1ee5d21611e2a0694e33af4703 Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Mon, 7 Feb 2022 22:26:21 +0530 Subject: [PATCH 1/5] HDDS-6244. ContainerBalancer metrics don't show updated values in JMX --- .../container/balancer/ContainerBalancer.java | 12 +- .../balancer/ContainerBalancerMetrics.java | 108 +++++------------- .../balancer/TestContainerBalancer.java | 28 ++--- 3 files changed, 45 insertions(+), 103 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java index ea32cfadfbbb..949de9c17793 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java @@ -194,6 +194,7 @@ private void balance() { //if no new move option is generated, it means the cluster can //not be balanced any more , so just stop IterationResult iR = doIteration(); + metrics.incrementCountIterations(1); LOG.info("Result of this iteration of Container Balancer: {}", iR); if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) { stop(); @@ -313,11 +314,6 @@ private boolean initializeIteration() { } if (Double.compare(utilization, upperLimit) > 0) { overUtilizedNodes.add(datanodeUsageInfo); - metrics.incrementDatanodesNumToBalance(1); - - metrics.setMaxDatanodeUtilizedPercentage(Math.max( - metrics.getMaxDatanodeUtilizedPercentage(), - ratioToPercent(utilization))); // amount of bytes greater than upper limit in this node Long overUtilizedBytes = ratioToBytes( @@ -328,7 +324,6 @@ private boolean initializeIteration() { totalOverUtilizedBytes += overUtilizedBytes; } else if (Double.compare(utilization, lowerLimit) < 0) { underUtilizedNodes.add(datanodeUsageInfo); - metrics.incrementDatanodesNumToBalance(1); // amount of bytes lesser than lower limit in this node Long underUtilizedBytes = ratioToBytes( @@ -341,9 +336,6 @@ private boolean initializeIteration() { withinThresholdUtilizedNodes.add(datanodeUsageInfo); } } - metrics.setDataSizeToBalanceGB( - Math.max(totalOverUtilizedBytes, totalUnderUtilizedBytes) / - OzoneConsts.GB); Collections.reverse(underUtilizedNodes); unBalancedNodes = new ArrayList<>( @@ -364,6 +356,8 @@ private boolean initializeIteration() { nodeManager, replicationManager, containerManager, findSourceStrategy); sourceToTargetMap = new HashMap<>(overUtilizedNodes.size() + withinThresholdUtilizedNodes.size()); + metrics.resetDataSizeMovedGB(); + metrics.resetMovedContainersNum(); return true; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index 984787fdecc1..44db5322ecc0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -23,8 +23,7 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; -import org.apache.hadoop.metrics2.lib.MutableGaugeInt; -import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.metrics2.lib.MutableCounterLong; /** * Metrics related to Container Balancer running in SCM. @@ -37,27 +36,16 @@ public final class ContainerBalancerMetrics { private final MetricsSystem ms; - @Metric(about = "The total amount of used space in GigaBytes that needs to " + - "be balanced.") - private MutableGaugeLong dataSizeToBalanceGB; + @Metric(about = "The amount of Gigabytes that Container Balancer moved" + + " in the last iteration.") + private MutableCounterLong dataSizeMovedGB; - @Metric(about = "The amount of Giga Bytes that have been moved to achieve " + - "balance.") - private MutableGaugeLong dataSizeMovedGB; + @Metric(about = "Number of containers that Container Balancer moved" + + " in the last iteration.") + private MutableCounterLong movedContainersNum; - @Metric(about = "Number of containers that Container Balancer has moved" + - " until now.") - private MutableGaugeLong movedContainersNum; - - @Metric(about = "The total number of datanodes that need to be balanced.") - private MutableGaugeLong datanodesNumToBalance; - - @Metric(about = "Number of datanodes that Container Balancer has balanced " + - "until now.") - private MutableGaugeLong datanodesNumBalanced; - - @Metric(about = "Utilisation value of the current maximum utilised datanode.") - private MutableGaugeInt maxDatanodeUtilizedPercentage; + @Metric(about = "Number of iterations that Container Balancer has run for.") + private MutableCounterLong countIterations; /** * Create and register metrics named {@link ContainerBalancerMetrics#NAME} @@ -75,82 +63,48 @@ private ContainerBalancerMetrics(MetricsSystem ms) { this.ms = ms; } - public long getDataSizeToBalanceGB() { - return dataSizeToBalanceGB.value(); - } - - public void setDataSizeToBalanceGB(long size) { - this.dataSizeToBalanceGB.set(size); - } - + /** + * Gets the amount of data moved by Container Balancer in the last iteration. + * @return size in GB + */ public long getDataSizeMovedGB() { return dataSizeMovedGB.value(); } - public void setDataSizeMovedGB(long dataSizeMovedGB) { - this.dataSizeMovedGB.set(dataSizeMovedGB); - } - - public long incrementDataSizeMovedGB(long valueToAdd) { + public void incrementDataSizeMovedGB(long valueToAdd) { this.dataSizeMovedGB.incr(valueToAdd); - return this.dataSizeMovedGB.value(); - } - - public long getMovedContainersNum() { - return movedContainersNum.value(); } - public void setMovedContainersNum(long movedContainersNum) { - this.movedContainersNum.set(movedContainersNum); - } - - public long incrementMovedContainersNum(long valueToAdd) { - this.movedContainersNum.incr(valueToAdd); - return this.movedContainersNum.value(); - } - - public long getDatanodesNumToBalance() { - return datanodesNumToBalance.value(); - } - - public void setDatanodesNumToBalance(long datanodesNumToBalance) { - this.datanodesNumToBalance.set(datanodesNumToBalance); + public void resetDataSizeMovedGB() { + dataSizeMovedGB.incr(-getDataSizeMovedGB()); } /** - * Add specified valueToAdd to the number of datanodes that need to be - * balanced. - * - * @param valueToAdd number of datanodes to add + * Gets the number of containers moved by Container Balancer in the last + * iteration. + * @return number of containers */ - public void incrementDatanodesNumToBalance(long valueToAdd) { - this.datanodesNumToBalance.incr(valueToAdd); + public long getMovedContainersNum() { + return movedContainersNum.value(); } - public long getDatanodesNumBalanced() { - return datanodesNumBalanced.value(); + public void incrementMovedContainersNum(long valueToAdd) { + this.movedContainersNum.incr(valueToAdd); } - public void setDatanodesNumBalanced(long datanodesNumBalanced) { - this.datanodesNumBalanced.set(datanodesNumBalanced); + public void resetMovedContainersNum() { + movedContainersNum.incr(-getMovedContainersNum()); } /** - * Add specified valueToAdd to datanodesNumBalanced. - * - * @param valueToAdd The value to add. - * @return The result after addition. + * Gets the number of iterations that Container Balancer has run for. + * @return number of iterations */ - public long incrementDatanodesNumBalanced(long valueToAdd) { - datanodesNumBalanced.incr(valueToAdd); - return datanodesNumBalanced.value(); - } - - public int getMaxDatanodeUtilizedPercentage() { - return maxDatanodeUtilizedPercentage.value(); + public long getCountIterations() { + return countIterations.value(); } - public void setMaxDatanodeUtilizedPercentage(int percentage) { - this.maxDatanodeUtilizedPercentage.set(percentage); + public void incrementCountIterations(long valueToAdd) { + countIterations.incr(valueToAdd); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java index e6f32e081ac6..dd63b7743ba5 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java @@ -53,6 +53,7 @@ import org.slf4j.LoggerFactory; import org.slf4j.event.Level; +import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; @@ -500,29 +501,22 @@ public void balancerShouldObeyMaxSizeEnteringTargetLimit() { @Test public void testMetrics() { + conf.set("hdds.datanode.du.refresh.period", "1ms"); + balancerConfiguration.setBalancingInterval(Duration.ofMillis(2)); balancerConfiguration.setThreshold(10); - balancerConfiguration.setIterations(1); - balancerConfiguration.setMaxSizeEnteringTarget(10 * OzoneConsts.GB); - balancerConfiguration.setMaxSizeToMovePerIteration(100 * OzoneConsts.GB); + balancerConfiguration.setIterations(2); + balancerConfiguration.setMaxSizeEnteringTarget(6 * OzoneConsts.GB); + // deliberately set max size per iteration to a low value, 6GB + balancerConfiguration.setMaxSizeToMovePerIteration(6 * OzoneConsts.GB); balancerConfiguration.setMaxDatanodesPercentageToInvolvePerIteration(100); containerBalancer.start(balancerConfiguration); + sleepWhileBalancing(500); - // waiting for balance completed. - // TODO: this is a temporary implementation for now - // modify this after balancer is fully completed - try { - Thread.sleep(500); - } catch (InterruptedException e) {} - - containerBalancer.stop(); ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); - Assert.assertEquals(determineExpectedUnBalancedNodes( - balancerConfiguration.getThreshold()).size(), - metrics.getDatanodesNumToBalance()); - Assert.assertEquals(ContainerBalancer.ratioToPercent( - nodeUtilizations.get(nodeUtilizations.size() - 1)), - metrics.getMaxDatanodeUtilizedPercentage()); + Assert.assertTrue(metrics.getDataSizeMovedGB() <= 6); + Assert.assertEquals(2, metrics.getCountIterations()); + containerBalancer.stop(); } /** From f49ebef44840a6844f2991ea91f721174940a309 Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Tue, 8 Feb 2022 12:27:19 +0530 Subject: [PATCH 2/5] trigger new CI check From 7c8ff475f3afa7fa0d56424baae6f56c09550343 Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Sat, 12 Feb 2022 23:49:12 +0530 Subject: [PATCH 3/5] replace some metrics + more testing --- .../container/balancer/ContainerBalancer.java | 48 ++++++--- .../balancer/ContainerBalancerMetrics.java | 101 ++++++++++++++---- .../balancer/TestContainerBalancer.java | 43 ++++---- 3 files changed, 136 insertions(+), 56 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java index 582756471132..265cd383879a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java @@ -268,15 +268,9 @@ private boolean initializeIteration() { datanodeUsageInfo.getDatanodeDetails())); this.totalNodesInCluster = datanodeUsageInfos.size(); - this.clusterCapacity = 0L; - this.clusterUsed = 0L; - this.clusterRemaining = 0L; - this.selectedContainers.clear(); - this.overUtilizedNodes.clear(); - this.underUtilizedNodes.clear(); - this.unBalancedNodes.clear(); - this.countDatanodesInvolvedPerIteration = 0; - this.sizeMovedPerIteration = 0; + + // reset some variables and metrics for this iteration + resetState(); clusterAvgUtilisation = calculateAvgUtilization(datanodeUsageInfos); if (LOG.isDebugEnabled()) { @@ -314,6 +308,7 @@ private boolean initializeIteration() { } if (Double.compare(utilization, upperLimit) > 0) { overUtilizedNodes.add(datanodeUsageInfo); + metrics.incrementDatanodesNumUnbalanced(1); // amount of bytes greater than upper limit in this node Long overUtilizedBytes = ratioToBytes( @@ -324,6 +319,7 @@ private boolean initializeIteration() { totalOverUtilizedBytes += overUtilizedBytes; } else if (Double.compare(utilization, lowerLimit) < 0) { underUtilizedNodes.add(datanodeUsageInfo); + metrics.incrementDatanodesNumUnbalanced(1); // amount of bytes lesser than lower limit in this node Long underUtilizedBytes = ratioToBytes( @@ -336,6 +332,9 @@ private boolean initializeIteration() { withinThresholdUtilizedNodes.add(datanodeUsageInfo); } } + metrics.incrementDataSizeCausingUnbalanceGB( + Math.max(totalOverUtilizedBytes, totalUnderUtilizedBytes) / + OzoneConsts.GB); Collections.reverse(underUtilizedNodes); unBalancedNodes = new ArrayList<>( @@ -356,8 +355,6 @@ private boolean initializeIteration() { nodeManager, replicationManager, containerManager, findSourceStrategy); sourceToTargetMap = new HashMap<>(overUtilizedNodes.size() + withinThresholdUtilizedNodes.size()); - metrics.resetDataSizeMovedGB(); - metrics.resetMovedContainersNum(); return true; } @@ -445,7 +442,7 @@ private void checkIterationMoveResults(Set selectedTargets) { ContainerInfo container = containerManager.getContainer(moveSelection.getContainerID()); this.sizeMovedPerIteration += container.getUsedBytes(); - metrics.incrementMovedContainersNum(1); + metrics.incrementMovedContainersNumInLatestIteration(1); LOG.info("Move completed for container {} to target {}", container.containerID(), moveSelection.getTargetNode().getUuidString()); @@ -456,7 +453,8 @@ private void checkIterationMoveResults(Set selectedTargets) { } } } catch (InterruptedException e) { - LOG.warn("Container move for container {} was interrupted.", + LOG.warn("Interrupted while waiting for container move result for " + + "container {}.", moveSelection.getContainerID(), e); Thread.currentThread().interrupt(); } catch (ExecutionException e) { @@ -469,7 +467,9 @@ private void checkIterationMoveResults(Set selectedTargets) { } countDatanodesInvolvedPerIteration = sourceToTargetMap.size() + selectedTargets.size(); - metrics.incrementDataSizeMovedGB( + metrics.incrementDatanodesNumInvolvedInLatestIteration( + countDatanodesInvolvedPerIteration); + metrics.incrementDataSizeMovedGBInLatestIteration( sizeMovedPerIteration / OzoneConsts.GB); LOG.info("Number of datanodes involved in this iteration: {}. Size moved " + "in this iteration: {}B.", @@ -734,6 +734,26 @@ private void incSizeSelectedForMoving(DatanodeDetails source, findTargetStrategy.increaseSizeEntering(target, size); } + /** + * Resets some variables and metrics for this iteration. + */ + private void resetState() { + this.clusterCapacity = 0L; + this.clusterUsed = 0L; + this.clusterRemaining = 0L; + this.selectedContainers.clear(); + this.overUtilizedNodes.clear(); + this.underUtilizedNodes.clear(); + this.unBalancedNodes.clear(); + this.countDatanodesInvolvedPerIteration = 0; + this.sizeMovedPerIteration = 0; + metrics.resetDataSizeMovedGBInLatestIteration(); + metrics.resetMovedContainersNumInLatestIteration(); + metrics.resetDatanodesNumInvolvedInLatestIteration(); + metrics.resetDataSizeCausingUnbalanceGB(); + metrics.resetDatanodesNumUnbalanced(); + } + /** * Stops ContainerBalancer. */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index 44db5322ecc0..eb3f3991fe76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -36,17 +36,27 @@ public final class ContainerBalancerMetrics { private final MetricsSystem ms; - @Metric(about = "The amount of Gigabytes that Container Balancer moved" + - " in the last iteration.") - private MutableCounterLong dataSizeMovedGB; + @Metric(about = "Amount of Gigabytes that Container Balancer moved" + + " in the latest iteration.") + private MutableCounterLong dataSizeMovedGBInLatestIteration; @Metric(about = "Number of containers that Container Balancer moved" + - " in the last iteration.") - private MutableCounterLong movedContainersNum; + " in the latest iteration.") + private MutableCounterLong movedContainersNumInLatestIteration; @Metric(about = "Number of iterations that Container Balancer has run for.") private MutableCounterLong countIterations; + @Metric(about = "Number of datanodes that were involved in balancing in the" + + " latest iteration.") + private MutableCounterLong datanodesNumInvolvedInLatestIteration; + + @Metric(about = "Amount of data in Gigabytes that is causing unbalance.") + private MutableCounterLong dataSizeCausingUnbalanceGB; + + @Metric(about = "Number of unbalanced datanodes.") + private MutableCounterLong datanodesNumUnbalanced; + /** * Create and register metrics named {@link ContainerBalancerMetrics#NAME} * for {@link ContainerBalancer}. @@ -64,36 +74,39 @@ private ContainerBalancerMetrics(MetricsSystem ms) { } /** - * Gets the amount of data moved by Container Balancer in the last iteration. + * Gets the amount of data moved by Container Balancer in the latest + * iteration. * @return size in GB */ - public long getDataSizeMovedGB() { - return dataSizeMovedGB.value(); + public long getDataSizeMovedGBInLatestIteration() { + return dataSizeMovedGBInLatestIteration.value(); } - public void incrementDataSizeMovedGB(long valueToAdd) { - this.dataSizeMovedGB.incr(valueToAdd); + public void incrementDataSizeMovedGBInLatestIteration(long valueToAdd) { + this.dataSizeMovedGBInLatestIteration.incr(valueToAdd); } - public void resetDataSizeMovedGB() { - dataSizeMovedGB.incr(-getDataSizeMovedGB()); + public void resetDataSizeMovedGBInLatestIteration() { + dataSizeMovedGBInLatestIteration.incr( + -getDataSizeMovedGBInLatestIteration()); } /** - * Gets the number of containers moved by Container Balancer in the last + * Gets the number of containers moved by Container Balancer in the latest * iteration. * @return number of containers */ - public long getMovedContainersNum() { - return movedContainersNum.value(); + public long getMovedContainersNumInLatestIteration() { + return movedContainersNumInLatestIteration.value(); } - public void incrementMovedContainersNum(long valueToAdd) { - this.movedContainersNum.incr(valueToAdd); + public void incrementMovedContainersNumInLatestIteration(long valueToAdd) { + this.movedContainersNumInLatestIteration.incr(valueToAdd); } - public void resetMovedContainersNum() { - movedContainersNum.incr(-getMovedContainersNum()); + public void resetMovedContainersNumInLatestIteration() { + movedContainersNumInLatestIteration.incr( + -getMovedContainersNumInLatestIteration()); } /** @@ -107,4 +120,54 @@ public long getCountIterations() { public void incrementCountIterations(long valueToAdd) { countIterations.incr(valueToAdd); } + + /** + * Gets number of datanodes that were involved in balancing in the latest + * iteration. + * @return number of datanodes + */ + public long getDatanodesNumInvolvedInLatestIteration() { + return datanodesNumInvolvedInLatestIteration.value(); + } + + public void incrementDatanodesNumInvolvedInLatestIteration(long valueToAdd) { + datanodesNumInvolvedInLatestIteration.incr(valueToAdd); + } + + public void resetDatanodesNumInvolvedInLatestIteration() { + datanodesNumInvolvedInLatestIteration.incr( + -getDatanodesNumInvolvedInLatestIteration()); + } + + /** + * Gets the amount of data in Gigabytes that is causing unbalance. + * @return size of data as a long value + */ + public long getDataSizeCausingUnbalanceGB() { + return dataSizeCausingUnbalanceGB.value(); + } + + public void incrementDataSizeCausingUnbalanceGB(long valueToAdd) { + dataSizeCausingUnbalanceGB.incr(valueToAdd); + } + + public void resetDataSizeCausingUnbalanceGB() { + dataSizeCausingUnbalanceGB.incr(-getDataSizeCausingUnbalanceGB()); + } + + /** + * Gets the number of datanodes that are unbalanced. + * @return long value + */ + public long getDatanodesNumUnbalanced() { + return datanodesNumUnbalanced.value(); + } + + public void incrementDatanodesNumUnbalanced(long valueToAdd) { + datanodesNumUnbalanced.incr(valueToAdd); + } + + public void resetDatanodesNumUnbalanced() { + datanodesNumUnbalanced.incr(-getDatanodesNumUnbalanced()); + } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java index 8a217aaa9b55..44d9b0611875 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java @@ -215,15 +215,12 @@ public void unBalancedNodesListShouldBeEmptyWhenClusterIsBalanced() { balancerConfiguration.setThreshold(99.99); containerBalancer.start(balancerConfiguration); - // waiting for balance completed. - // TODO: this is a temporary implementation for now - // modify this after balancer is fully completed - try { - Thread.sleep(100); - } catch (InterruptedException e) { } + sleepWhileBalancing(100); containerBalancer.stop(); + ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); Assert.assertEquals(0, containerBalancer.getUnBalancedNodes().size()); + Assert.assertEquals(0, metrics.getDatanodesNumUnbalanced()); } /** @@ -240,16 +237,15 @@ public void containerBalancerShouldObeyMaxDatanodesToInvolveLimit() { balancerConfiguration.setIterations(1); containerBalancer.start(balancerConfiguration); - // waiting for balance completed. - // TODO: this is a temporary implementation for now - // modify this after balancer is fully completed - try { - Thread.sleep(1000); - } catch (InterruptedException e) { } + sleepWhileBalancing(500); + int number = percent * numberOfNodes / 100; + ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); + Assert.assertFalse( + containerBalancer.getCountDatanodesInvolvedPerIteration() > number); + Assert.assertTrue(metrics.getDatanodesNumInvolvedInLatestIteration() > 0); Assert.assertFalse( - containerBalancer.getCountDatanodesInvolvedPerIteration() > - (percent * numberOfNodes / 100)); + metrics.getDatanodesNumInvolvedInLatestIteration() > number); containerBalancer.stop(); } @@ -306,16 +302,14 @@ public void containerBalancerShouldObeyMaxSizeToMoveLimit() { balancerConfiguration.setIterations(1); containerBalancer.start(balancerConfiguration); - // waiting for balance completed. - // TODO: this is a temporary implementation for now - // modify this after balancer is fully completed - try { - Thread.sleep(1000); - } catch (InterruptedException e) { } + sleepWhileBalancing(500); // balancer should not have moved more size than the limit Assert.assertFalse(containerBalancer.getSizeMovedPerIteration() > 10 * OzoneConsts.GB); + Assert.assertFalse( + containerBalancer.getMetrics().getDataSizeMovedGBInLatestIteration() > + 10); containerBalancer.stop(); } @@ -504,7 +498,7 @@ public void testMetrics() { conf.set("hdds.datanode.du.refresh.period", "1ms"); balancerConfiguration.setBalancingInterval(Duration.ofMillis(2)); balancerConfiguration.setThreshold(10); - balancerConfiguration.setIterations(2); + balancerConfiguration.setIterations(1); balancerConfiguration.setMaxSizeEnteringTarget(6 * OzoneConsts.GB); // deliberately set max size per iteration to a low value, 6GB balancerConfiguration.setMaxSizeToMovePerIteration(6 * OzoneConsts.GB); @@ -514,8 +508,11 @@ public void testMetrics() { sleepWhileBalancing(500); ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); - Assert.assertTrue(metrics.getDataSizeMovedGB() <= 6); - Assert.assertEquals(2, metrics.getCountIterations()); + Assert.assertEquals(determineExpectedUnBalancedNodes( + balancerConfiguration.getThreshold()).size(), + metrics.getDatanodesNumUnbalanced()); + Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6); + Assert.assertEquals(1, metrics.getCountIterations()); containerBalancer.stop(); } From bedf5880ea91eca1669b01062dcc12a87a8050cc Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Tue, 22 Feb 2022 15:55:46 +0530 Subject: [PATCH 4/5] change metric names and improve testing --- .../container/balancer/ContainerBalancer.java | 6 ++--- .../balancer/ContainerBalancerMetrics.java | 24 +++++++++---------- .../balancer/TestContainerBalancer.java | 10 ++++---- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java index 265cd383879a..821bc4d0c071 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java @@ -194,7 +194,7 @@ private void balance() { //if no new move option is generated, it means the cluster can //not be balanced any more , so just stop IterationResult iR = doIteration(); - metrics.incrementCountIterations(1); + metrics.incrementNumIterations(1); LOG.info("Result of this iteration of Container Balancer: {}", iR); if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) { stop(); @@ -332,7 +332,7 @@ private boolean initializeIteration() { withinThresholdUtilizedNodes.add(datanodeUsageInfo); } } - metrics.incrementDataSizeCausingUnbalanceGB( + metrics.incrementDataSizeUnbalancedGB( Math.max(totalOverUtilizedBytes, totalUnderUtilizedBytes) / OzoneConsts.GB); Collections.reverse(underUtilizedNodes); @@ -750,7 +750,7 @@ private void resetState() { metrics.resetDataSizeMovedGBInLatestIteration(); metrics.resetMovedContainersNumInLatestIteration(); metrics.resetDatanodesNumInvolvedInLatestIteration(); - metrics.resetDataSizeCausingUnbalanceGB(); + metrics.resetDataSizeUnbalancedGB(); metrics.resetDatanodesNumUnbalanced(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index eb3f3991fe76..c60f4fe7fd42 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -45,14 +45,14 @@ public final class ContainerBalancerMetrics { private MutableCounterLong movedContainersNumInLatestIteration; @Metric(about = "Number of iterations that Container Balancer has run for.") - private MutableCounterLong countIterations; + private MutableCounterLong numIterations; @Metric(about = "Number of datanodes that were involved in balancing in the" + " latest iteration.") private MutableCounterLong datanodesNumInvolvedInLatestIteration; @Metric(about = "Amount of data in Gigabytes that is causing unbalance.") - private MutableCounterLong dataSizeCausingUnbalanceGB; + private MutableCounterLong dataSizeUnbalancedGB; @Metric(about = "Number of unbalanced datanodes.") private MutableCounterLong datanodesNumUnbalanced; @@ -113,12 +113,12 @@ public void resetMovedContainersNumInLatestIteration() { * Gets the number of iterations that Container Balancer has run for. * @return number of iterations */ - public long getCountIterations() { - return countIterations.value(); + public long getNumIterations() { + return numIterations.value(); } - public void incrementCountIterations(long valueToAdd) { - countIterations.incr(valueToAdd); + public void incrementNumIterations(long valueToAdd) { + numIterations.incr(valueToAdd); } /** @@ -143,16 +143,16 @@ public void resetDatanodesNumInvolvedInLatestIteration() { * Gets the amount of data in Gigabytes that is causing unbalance. * @return size of data as a long value */ - public long getDataSizeCausingUnbalanceGB() { - return dataSizeCausingUnbalanceGB.value(); + public long getDataSizeUnbalancedGB() { + return dataSizeUnbalancedGB.value(); } - public void incrementDataSizeCausingUnbalanceGB(long valueToAdd) { - dataSizeCausingUnbalanceGB.incr(valueToAdd); + public void incrementDataSizeUnbalancedGB(long valueToAdd) { + dataSizeUnbalancedGB.incr(valueToAdd); } - public void resetDataSizeCausingUnbalanceGB() { - dataSizeCausingUnbalanceGB.incr(-getDataSizeCausingUnbalanceGB()); + public void resetDataSizeUnbalancedGB() { + dataSizeUnbalancedGB.incr(-getDataSizeUnbalancedGB()); } /** diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java index 44d9b0611875..6450a6118b27 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java @@ -307,9 +307,11 @@ public void containerBalancerShouldObeyMaxSizeToMoveLimit() { // balancer should not have moved more size than the limit Assert.assertFalse(containerBalancer.getSizeMovedPerIteration() > 10 * OzoneConsts.GB); - Assert.assertFalse( - containerBalancer.getMetrics().getDataSizeMovedGBInLatestIteration() > - 10); + + long size = + containerBalancer.getMetrics().getDataSizeMovedGBInLatestIteration(); + Assert.assertTrue(size > 0); + Assert.assertFalse(size > 10); containerBalancer.stop(); } @@ -512,7 +514,7 @@ public void testMetrics() { balancerConfiguration.getThreshold()).size(), metrics.getDatanodesNumUnbalanced()); Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6); - Assert.assertEquals(1, metrics.getCountIterations()); + Assert.assertEquals(1, metrics.getNumIterations()); containerBalancer.stop(); } From 7b4cc55aaf0ec894cef862acb39222741ad8536c Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Fri, 25 Feb 2022 16:19:03 +0530 Subject: [PATCH 5/5] use "num" as prefix for naming metrics --- .../container/balancer/ContainerBalancer.java | 14 +++--- .../balancer/ContainerBalancerMetrics.java | 46 +++++++++---------- .../balancer/TestContainerBalancer.java | 8 ++-- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java index 821bc4d0c071..c3a82f5cdca9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancer.java @@ -308,7 +308,7 @@ private boolean initializeIteration() { } if (Double.compare(utilization, upperLimit) > 0) { overUtilizedNodes.add(datanodeUsageInfo); - metrics.incrementDatanodesNumUnbalanced(1); + metrics.incrementNumDatanodesUnbalanced(1); // amount of bytes greater than upper limit in this node Long overUtilizedBytes = ratioToBytes( @@ -319,7 +319,7 @@ private boolean initializeIteration() { totalOverUtilizedBytes += overUtilizedBytes; } else if (Double.compare(utilization, lowerLimit) < 0) { underUtilizedNodes.add(datanodeUsageInfo); - metrics.incrementDatanodesNumUnbalanced(1); + metrics.incrementNumDatanodesUnbalanced(1); // amount of bytes lesser than lower limit in this node Long underUtilizedBytes = ratioToBytes( @@ -442,7 +442,7 @@ private void checkIterationMoveResults(Set selectedTargets) { ContainerInfo container = containerManager.getContainer(moveSelection.getContainerID()); this.sizeMovedPerIteration += container.getUsedBytes(); - metrics.incrementMovedContainersNumInLatestIteration(1); + metrics.incrementNumMovedContainersInLatestIteration(1); LOG.info("Move completed for container {} to target {}", container.containerID(), moveSelection.getTargetNode().getUuidString()); @@ -467,7 +467,7 @@ private void checkIterationMoveResults(Set selectedTargets) { } countDatanodesInvolvedPerIteration = sourceToTargetMap.size() + selectedTargets.size(); - metrics.incrementDatanodesNumInvolvedInLatestIteration( + metrics.incrementNumDatanodesInvolvedInLatestIteration( countDatanodesInvolvedPerIteration); metrics.incrementDataSizeMovedGBInLatestIteration( sizeMovedPerIteration / OzoneConsts.GB); @@ -748,10 +748,10 @@ private void resetState() { this.countDatanodesInvolvedPerIteration = 0; this.sizeMovedPerIteration = 0; metrics.resetDataSizeMovedGBInLatestIteration(); - metrics.resetMovedContainersNumInLatestIteration(); - metrics.resetDatanodesNumInvolvedInLatestIteration(); + metrics.resetNumMovedContainersInLatestIteration(); + metrics.resetNumDatanodesInvolvedInLatestIteration(); metrics.resetDataSizeUnbalancedGB(); - metrics.resetDatanodesNumUnbalanced(); + metrics.resetNumDatanodesUnbalanced(); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index c60f4fe7fd42..07998447ccad 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -42,20 +42,20 @@ public final class ContainerBalancerMetrics { @Metric(about = "Number of containers that Container Balancer moved" + " in the latest iteration.") - private MutableCounterLong movedContainersNumInLatestIteration; + private MutableCounterLong numMovedContainersInLatestIteration; @Metric(about = "Number of iterations that Container Balancer has run for.") private MutableCounterLong numIterations; @Metric(about = "Number of datanodes that were involved in balancing in the" + " latest iteration.") - private MutableCounterLong datanodesNumInvolvedInLatestIteration; + private MutableCounterLong numDatanodesInvolvedInLatestIteration; @Metric(about = "Amount of data in Gigabytes that is causing unbalance.") private MutableCounterLong dataSizeUnbalancedGB; @Metric(about = "Number of unbalanced datanodes.") - private MutableCounterLong datanodesNumUnbalanced; + private MutableCounterLong numDatanodesUnbalanced; /** * Create and register metrics named {@link ContainerBalancerMetrics#NAME} @@ -96,17 +96,17 @@ public void resetDataSizeMovedGBInLatestIteration() { * iteration. * @return number of containers */ - public long getMovedContainersNumInLatestIteration() { - return movedContainersNumInLatestIteration.value(); + public long getNumMovedContainersInLatestIteration() { + return numMovedContainersInLatestIteration.value(); } - public void incrementMovedContainersNumInLatestIteration(long valueToAdd) { - this.movedContainersNumInLatestIteration.incr(valueToAdd); + public void incrementNumMovedContainersInLatestIteration(long valueToAdd) { + this.numMovedContainersInLatestIteration.incr(valueToAdd); } - public void resetMovedContainersNumInLatestIteration() { - movedContainersNumInLatestIteration.incr( - -getMovedContainersNumInLatestIteration()); + public void resetNumMovedContainersInLatestIteration() { + numMovedContainersInLatestIteration.incr( + -getNumMovedContainersInLatestIteration()); } /** @@ -126,17 +126,17 @@ public void incrementNumIterations(long valueToAdd) { * iteration. * @return number of datanodes */ - public long getDatanodesNumInvolvedInLatestIteration() { - return datanodesNumInvolvedInLatestIteration.value(); + public long getNumDatanodesInvolvedInLatestIteration() { + return numDatanodesInvolvedInLatestIteration.value(); } - public void incrementDatanodesNumInvolvedInLatestIteration(long valueToAdd) { - datanodesNumInvolvedInLatestIteration.incr(valueToAdd); + public void incrementNumDatanodesInvolvedInLatestIteration(long valueToAdd) { + numDatanodesInvolvedInLatestIteration.incr(valueToAdd); } - public void resetDatanodesNumInvolvedInLatestIteration() { - datanodesNumInvolvedInLatestIteration.incr( - -getDatanodesNumInvolvedInLatestIteration()); + public void resetNumDatanodesInvolvedInLatestIteration() { + numDatanodesInvolvedInLatestIteration.incr( + -getNumDatanodesInvolvedInLatestIteration()); } /** @@ -159,15 +159,15 @@ public void resetDataSizeUnbalancedGB() { * Gets the number of datanodes that are unbalanced. * @return long value */ - public long getDatanodesNumUnbalanced() { - return datanodesNumUnbalanced.value(); + public long getNumDatanodesUnbalanced() { + return numDatanodesUnbalanced.value(); } - public void incrementDatanodesNumUnbalanced(long valueToAdd) { - datanodesNumUnbalanced.incr(valueToAdd); + public void incrementNumDatanodesUnbalanced(long valueToAdd) { + numDatanodesUnbalanced.incr(valueToAdd); } - public void resetDatanodesNumUnbalanced() { - datanodesNumUnbalanced.incr(-getDatanodesNumUnbalanced()); + public void resetNumDatanodesUnbalanced() { + numDatanodesUnbalanced.incr(-getNumDatanodesUnbalanced()); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java index 6450a6118b27..8d58af1925a0 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancer.java @@ -220,7 +220,7 @@ public void unBalancedNodesListShouldBeEmptyWhenClusterIsBalanced() { containerBalancer.stop(); ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); Assert.assertEquals(0, containerBalancer.getUnBalancedNodes().size()); - Assert.assertEquals(0, metrics.getDatanodesNumUnbalanced()); + Assert.assertEquals(0, metrics.getNumDatanodesUnbalanced()); } /** @@ -243,9 +243,9 @@ public void containerBalancerShouldObeyMaxDatanodesToInvolveLimit() { ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); Assert.assertFalse( containerBalancer.getCountDatanodesInvolvedPerIteration() > number); - Assert.assertTrue(metrics.getDatanodesNumInvolvedInLatestIteration() > 0); + Assert.assertTrue(metrics.getNumDatanodesInvolvedInLatestIteration() > 0); Assert.assertFalse( - metrics.getDatanodesNumInvolvedInLatestIteration() > number); + metrics.getNumDatanodesInvolvedInLatestIteration() > number); containerBalancer.stop(); } @@ -512,7 +512,7 @@ public void testMetrics() { ContainerBalancerMetrics metrics = containerBalancer.getMetrics(); Assert.assertEquals(determineExpectedUnBalancedNodes( balancerConfiguration.getThreshold()).size(), - metrics.getDatanodesNumUnbalanced()); + metrics.getNumDatanodesUnbalanced()); Assert.assertTrue(metrics.getDataSizeMovedGBInLatestIteration() <= 6); Assert.assertEquals(1, metrics.getNumIterations()); containerBalancer.stop();