From 71c54753fa281f20b6cff70648788bfb64808c89 Mon Sep 17 00:00:00 2001 From: Aryan Gupta Date: Mon, 9 Jun 2025 13:28:28 +0530 Subject: [PATCH 1/5] HDDS-13142. Correct SCMPerformanceMetrics for delete operation. --- .../metrics/SCMPerformanceMetrics.java | 22 +++++++++++++++---- .../scm/server/SCMBlockProtocolServer.java | 13 ++++++++--- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index b2d142dc3d46..491a07f78c63 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -55,6 +55,10 @@ public final class SCMPerformanceMetrics implements MetricsSource { private MutableRate allocateBlockSuccessLatencyNs; @Metric(about = "Latency for a failed allocateBlock call in nanoseconds") private MutableRate allocateBlockFailureLatencyNs; + @Metric(about = "Total blocks taken in each key delete cycle.") + private MutableCounterLong deleteKeyBlocksInKeyDeleteCycle; + @Metric(about = "Total blocks taken in each key delete cycle failure.") + private MutableCounterLong deleteKeyBlocksInKeyDeleteCycleFailure; public SCMPerformanceMetrics() { this.registry = new MetricsRegistry(SOURCE_NAME); @@ -84,6 +88,8 @@ public void getMetrics(MetricsCollector collector, boolean all) { deleteKeyFailureLatencyNs.snapshot(recordBuilder, true); allocateBlockSuccessLatencyNs.snapshot(recordBuilder, true); allocateBlockFailureLatencyNs.snapshot(recordBuilder, true); + deleteKeyBlocksInKeyDeleteCycle.snapshot(recordBuilder, true); + deleteKeyBlocksInKeyDeleteCycleFailure.snapshot(recordBuilder, true); } public void updateAllocateBlockSuccessLatencyNs(long startNanos) { @@ -94,14 +100,22 @@ public void updateAllocateBlockFailureLatencyNs(long startNanos) { allocateBlockFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeySuccessStats(long startNanos) { - deleteKeySuccess.incr(); + public void updateDeleteKeySuccessStats(long keys, long startNanos) { + deleteKeySuccess.incr(keys); deleteKeySuccessLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeyFailureStats(long startNanos) { - deleteKeyFailure.incr(); + public void updateDeleteKeyFailureStats(long keys, long startNanos) { + deleteKeyFailure.incr(keys); deleteKeyFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } + + public void updateDeleteKeyBlocksInKeyDeleteCycle(long keys) { + deleteKeyBlocksInKeyDeleteCycle.incr(keys); + } + + public void updateDeleteKeyFailedBlocksInKeyDeleteCycle(long keys) { + deleteKeyBlocksInKeyDeleteCycleFailure.incr(keys); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 1627342c29a1..0baeb3b21a35 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -267,7 +267,7 @@ public List allocateBlock( public List deleteKeyBlocks( List keyBlocksInfoList) throws IOException { if (LOG.isDebugEnabled()) { - LOG.debug("SCM is informed by OM to delete {} blocks", + LOG.debug("SCM is informed by OM to delete {} keys.", keyBlocksInfoList.size()); } @@ -276,14 +276,21 @@ public List deleteKeyBlocks( ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result resultCode; Exception e = null; long startNanos = Time.monotonicNowNanos(); + long totalBlocks = 0; + for (BlockGroup bg : keyBlocksInfoList) { + totalBlocks += bg.getBlockIDList().size(); + } try { scm.getScmBlockManager().deleteBlocks(keyBlocksInfoList); - perfMetrics.updateDeleteKeySuccessStats(startNanos); + perfMetrics.updateDeleteKeyBlocksInKeyDeleteCycle(totalBlocks); + perfMetrics.updateDeleteKeySuccessStats(keyBlocksInfoList.size(), startNanos); resultCode = ScmBlockLocationProtocolProtos. DeleteScmBlockResult.Result.success; + LOG.info("Total number of blocks ACK by SCM in this cycle: " + totalBlocks); } catch (IOException ioe) { e = ioe; - perfMetrics.updateDeleteKeyFailureStats(startNanos); + perfMetrics.updateDeleteKeyFailedBlocksInKeyDeleteCycle(totalBlocks); + perfMetrics.updateDeleteKeyFailureStats(keyBlocksInfoList.size(), startNanos); LOG.warn("Fail to delete {} keys", keyBlocksInfoList.size(), ioe); switch (ioe instanceof SCMException ? ((SCMException) ioe).getResult() : IO_EXCEPTION) { From 5f6afc5bc73721596f4cb239d50b19892474bfec Mon Sep 17 00:00:00 2001 From: Aryan Gupta Date: Mon, 9 Jun 2025 14:31:21 +0530 Subject: [PATCH 2/5] Addressed comments. --- .../metrics/SCMPerformanceMetrics.java | 2 +- .../scm/server/SCMBlockProtocolServer.java | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index 491a07f78c63..485a166a7a06 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -110,7 +110,7 @@ public void updateDeleteKeyFailureStats(long keys, long startNanos) { deleteKeyFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeyBlocksInKeyDeleteCycle(long keys) { + public void updateDeleteKeySuccessBlocksInKeyDeleteCycle(long keys) { deleteKeyBlocksInKeyDeleteCycle.incr(keys); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 0baeb3b21a35..b8622be8ea7a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -266,27 +266,28 @@ public List allocateBlock( @Override public List deleteKeyBlocks( List keyBlocksInfoList) throws IOException { + long totalBlocks = 0; + for (BlockGroup bg : keyBlocksInfoList) { + totalBlocks += bg.getBlockIDList().size(); + } if (LOG.isDebugEnabled()) { - LOG.debug("SCM is informed by OM to delete {} keys.", - keyBlocksInfoList.size()); + LOG.debug("SCM is informed by OM to delete {} keys. Total blocks to deleted {}.", + keyBlocksInfoList.size(), totalBlocks); } - List results = new ArrayList<>(); Map auditMap = Maps.newHashMap(); ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result resultCode; Exception e = null; long startNanos = Time.monotonicNowNanos(); - long totalBlocks = 0; - for (BlockGroup bg : keyBlocksInfoList) { - totalBlocks += bg.getBlockIDList().size(); - } try { scm.getScmBlockManager().deleteBlocks(keyBlocksInfoList); - perfMetrics.updateDeleteKeyBlocksInKeyDeleteCycle(totalBlocks); + perfMetrics.updateDeleteKeySuccessBlocksInKeyDeleteCycle(totalBlocks); perfMetrics.updateDeleteKeySuccessStats(keyBlocksInfoList.size(), startNanos); resultCode = ScmBlockLocationProtocolProtos. DeleteScmBlockResult.Result.success; - LOG.info("Total number of blocks ACK by SCM in this cycle: " + totalBlocks); + if (LOG.isDebugEnabled()) { + LOG.debug("Total number of blocks ACK by SCM in this cycle: " + totalBlocks); + } } catch (IOException ioe) { e = ioe; perfMetrics.updateDeleteKeyFailedBlocksInKeyDeleteCycle(totalBlocks); From 71555ac417821cab3d97563034bf69eb22103e2d Mon Sep 17 00:00:00 2001 From: Aryan Gupta Date: Mon, 9 Jun 2025 14:39:34 +0530 Subject: [PATCH 3/5] Addressed comments. --- .../metrics/SCMPerformanceMetrics.java | 18 +++++++++--------- .../scm/server/SCMBlockProtocolServer.java | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index 485a166a7a06..e87fb831d4f5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -43,7 +43,7 @@ public final class SCMPerformanceMetrics implements MetricsSource { private MetricsRegistry registry; private static SCMPerformanceMetrics instance; - @Metric(about = "Number of failed deleteKey operations") + @Metric(about = "Number of failed deleteKeys") private MutableCounterLong deleteKeyFailure; @Metric(about = "Number of successful deleteKey operations") private MutableCounterLong deleteKeySuccess; @@ -56,9 +56,9 @@ public final class SCMPerformanceMetrics implements MetricsSource { @Metric(about = "Latency for a failed allocateBlock call in nanoseconds") private MutableRate allocateBlockFailureLatencyNs; @Metric(about = "Total blocks taken in each key delete cycle.") - private MutableCounterLong deleteKeyBlocksInKeyDeleteCycle; + private MutableCounterLong deleteKeyBlocksSuccess; @Metric(about = "Total blocks taken in each key delete cycle failure.") - private MutableCounterLong deleteKeyBlocksInKeyDeleteCycleFailure; + private MutableCounterLong deleteKeyBlocksFailure; public SCMPerformanceMetrics() { this.registry = new MetricsRegistry(SOURCE_NAME); @@ -88,8 +88,8 @@ public void getMetrics(MetricsCollector collector, boolean all) { deleteKeyFailureLatencyNs.snapshot(recordBuilder, true); allocateBlockSuccessLatencyNs.snapshot(recordBuilder, true); allocateBlockFailureLatencyNs.snapshot(recordBuilder, true); - deleteKeyBlocksInKeyDeleteCycle.snapshot(recordBuilder, true); - deleteKeyBlocksInKeyDeleteCycleFailure.snapshot(recordBuilder, true); + deleteKeyBlocksSuccess.snapshot(recordBuilder, true); + deleteKeyBlocksFailure.snapshot(recordBuilder, true); } public void updateAllocateBlockSuccessLatencyNs(long startNanos) { @@ -110,12 +110,12 @@ public void updateDeleteKeyFailureStats(long keys, long startNanos) { deleteKeyFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeySuccessBlocksInKeyDeleteCycle(long keys) { - deleteKeyBlocksInKeyDeleteCycle.incr(keys); + public void updateDeleteKeySuccessBlocks(long keys) { + deleteKeyBlocksSuccess.incr(keys); } - public void updateDeleteKeyFailedBlocksInKeyDeleteCycle(long keys) { - deleteKeyBlocksInKeyDeleteCycleFailure.incr(keys); + public void updateDeleteKeyFailedBlocks(long keys) { + deleteKeyBlocksFailure.incr(keys); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index b8622be8ea7a..962a1a1b91ab 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -281,7 +281,7 @@ public List deleteKeyBlocks( long startNanos = Time.monotonicNowNanos(); try { scm.getScmBlockManager().deleteBlocks(keyBlocksInfoList); - perfMetrics.updateDeleteKeySuccessBlocksInKeyDeleteCycle(totalBlocks); + perfMetrics.updateDeleteKeySuccessBlocks(totalBlocks); perfMetrics.updateDeleteKeySuccessStats(keyBlocksInfoList.size(), startNanos); resultCode = ScmBlockLocationProtocolProtos. DeleteScmBlockResult.Result.success; @@ -290,7 +290,7 @@ public List deleteKeyBlocks( } } catch (IOException ioe) { e = ioe; - perfMetrics.updateDeleteKeyFailedBlocksInKeyDeleteCycle(totalBlocks); + perfMetrics.updateDeleteKeyFailedBlocks(totalBlocks); perfMetrics.updateDeleteKeyFailureStats(keyBlocksInfoList.size(), startNanos); LOG.warn("Fail to delete {} keys", keyBlocksInfoList.size(), ioe); switch (ioe instanceof SCMException ? ((SCMException) ioe).getResult() : From 55957377386d6e18448bc9f2107e4fccf53ea369 Mon Sep 17 00:00:00 2001 From: Aryan Gupta Date: Mon, 9 Jun 2025 14:47:04 +0530 Subject: [PATCH 4/5] Addressed comment. --- .../scm/container/placement/metrics/SCMPerformanceMetrics.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index e87fb831d4f5..a01effa3a20b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -45,7 +45,7 @@ public final class SCMPerformanceMetrics implements MetricsSource { @Metric(about = "Number of failed deleteKeys") private MutableCounterLong deleteKeyFailure; - @Metric(about = "Number of successful deleteKey operations") + @Metric(about = "Number of success deleteKeys") private MutableCounterLong deleteKeySuccess; @Metric(about = "Latency for deleteKey failure in nanoseconds") private MutableRate deleteKeyFailureLatencyNs; From 706d54bda31a8aa93efd1608f0e7efe3ad87fe43 Mon Sep 17 00:00:00 2001 From: Aryan Gupta Date: Wed, 11 Jun 2025 03:15:40 +0530 Subject: [PATCH 5/5] Fixed blockSent. --- .../hdds/scm/block/ScmBlockDeletingServiceMetrics.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java index 0e735b1557d2..cbfdddda7ca9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java @@ -270,7 +270,7 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) { .addGauge(DatanodeCommandDetails.COMMANDS_TIMEOUT_BY_DN, e.getValue().getCommandsTimeout()) .addGauge(DatanodeCommandDetails.BLOCKS_SENT_TO_DN_COMMAND, - e.getValue().getCommandsTimeout()); + e.getValue().getBlocksSent()); } recordBuilder.endRecord(); } @@ -347,6 +347,10 @@ public long getCommandsTimeout() { return commandsTimeout; } + public long getBlocksSent() { + return blocksSent; + } + @Override public String toString() { return "Sent=" + commandsSent + ", Success=" + commandsSuccess + ", Failed=" + commandsFailure +