diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java index 0e735b1557d2..cbfdddda7ca9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/ScmBlockDeletingServiceMetrics.java @@ -270,7 +270,7 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) { .addGauge(DatanodeCommandDetails.COMMANDS_TIMEOUT_BY_DN, e.getValue().getCommandsTimeout()) .addGauge(DatanodeCommandDetails.BLOCKS_SENT_TO_DN_COMMAND, - e.getValue().getCommandsTimeout()); + e.getValue().getBlocksSent()); } recordBuilder.endRecord(); } @@ -347,6 +347,10 @@ public long getCommandsTimeout() { return commandsTimeout; } + public long getBlocksSent() { + return blocksSent; + } + @Override public String toString() { return "Sent=" + commandsSent + ", Success=" + commandsSuccess + ", Failed=" + commandsFailure + diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java index b2d142dc3d46..a01effa3a20b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/placement/metrics/SCMPerformanceMetrics.java @@ -43,9 +43,9 @@ public final class SCMPerformanceMetrics implements MetricsSource { private MetricsRegistry registry; private static SCMPerformanceMetrics instance; - @Metric(about = "Number of failed deleteKey operations") + @Metric(about = "Number of failed deleteKeys") private MutableCounterLong deleteKeyFailure; - @Metric(about = "Number of successful deleteKey operations") + @Metric(about = "Number of success deleteKeys") private MutableCounterLong deleteKeySuccess; @Metric(about = "Latency for deleteKey failure in nanoseconds") private MutableRate deleteKeyFailureLatencyNs; @@ -55,6 +55,10 @@ public final class SCMPerformanceMetrics implements MetricsSource { private MutableRate allocateBlockSuccessLatencyNs; @Metric(about = "Latency for a failed allocateBlock call in nanoseconds") private MutableRate allocateBlockFailureLatencyNs; + @Metric(about = "Total blocks taken in each key delete cycle.") + private MutableCounterLong deleteKeyBlocksSuccess; + @Metric(about = "Total blocks taken in each key delete cycle failure.") + private MutableCounterLong deleteKeyBlocksFailure; public SCMPerformanceMetrics() { this.registry = new MetricsRegistry(SOURCE_NAME); @@ -84,6 +88,8 @@ public void getMetrics(MetricsCollector collector, boolean all) { deleteKeyFailureLatencyNs.snapshot(recordBuilder, true); allocateBlockSuccessLatencyNs.snapshot(recordBuilder, true); allocateBlockFailureLatencyNs.snapshot(recordBuilder, true); + deleteKeyBlocksSuccess.snapshot(recordBuilder, true); + deleteKeyBlocksFailure.snapshot(recordBuilder, true); } public void updateAllocateBlockSuccessLatencyNs(long startNanos) { @@ -94,14 +100,22 @@ public void updateAllocateBlockFailureLatencyNs(long startNanos) { allocateBlockFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeySuccessStats(long startNanos) { - deleteKeySuccess.incr(); + public void updateDeleteKeySuccessStats(long keys, long startNanos) { + deleteKeySuccess.incr(keys); deleteKeySuccessLatencyNs.add(Time.monotonicNowNanos() - startNanos); } - public void updateDeleteKeyFailureStats(long startNanos) { - deleteKeyFailure.incr(); + public void updateDeleteKeyFailureStats(long keys, long startNanos) { + deleteKeyFailure.incr(keys); deleteKeyFailureLatencyNs.add(Time.monotonicNowNanos() - startNanos); } + + public void updateDeleteKeySuccessBlocks(long keys) { + deleteKeyBlocksSuccess.incr(keys); + } + + public void updateDeleteKeyFailedBlocks(long keys) { + deleteKeyBlocksFailure.incr(keys); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java index 1627342c29a1..962a1a1b91ab 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMBlockProtocolServer.java @@ -266,11 +266,14 @@ public List allocateBlock( @Override public List deleteKeyBlocks( List keyBlocksInfoList) throws IOException { + long totalBlocks = 0; + for (BlockGroup bg : keyBlocksInfoList) { + totalBlocks += bg.getBlockIDList().size(); + } if (LOG.isDebugEnabled()) { - LOG.debug("SCM is informed by OM to delete {} blocks", - keyBlocksInfoList.size()); + LOG.debug("SCM is informed by OM to delete {} keys. Total blocks to deleted {}.", + keyBlocksInfoList.size(), totalBlocks); } - List results = new ArrayList<>(); Map auditMap = Maps.newHashMap(); ScmBlockLocationProtocolProtos.DeleteScmBlockResult.Result resultCode; @@ -278,12 +281,17 @@ public List deleteKeyBlocks( long startNanos = Time.monotonicNowNanos(); try { scm.getScmBlockManager().deleteBlocks(keyBlocksInfoList); - perfMetrics.updateDeleteKeySuccessStats(startNanos); + perfMetrics.updateDeleteKeySuccessBlocks(totalBlocks); + perfMetrics.updateDeleteKeySuccessStats(keyBlocksInfoList.size(), startNanos); resultCode = ScmBlockLocationProtocolProtos. DeleteScmBlockResult.Result.success; + if (LOG.isDebugEnabled()) { + LOG.debug("Total number of blocks ACK by SCM in this cycle: " + totalBlocks); + } } catch (IOException ioe) { e = ioe; - perfMetrics.updateDeleteKeyFailureStats(startNanos); + perfMetrics.updateDeleteKeyFailedBlocks(totalBlocks); + perfMetrics.updateDeleteKeyFailureStats(keyBlocksInfoList.size(), startNanos); LOG.warn("Fail to delete {} keys", keyBlocksInfoList.size(), ioe); switch (ioe instanceof SCMException ? ((SCMException) ioe).getResult() : IO_EXCEPTION) {