From e3d089f45872bf42d50c811f18322bb8cc92ecea Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Sat, 3 Aug 2024 18:01:48 +0800 Subject: [PATCH 1/4] HDDS-11272. Statistics some node status information --- .../hadoop/hdds/scm/node/DatanodeInfo.java | 9 ++++ .../hdds/scm/node/NodeStateManager.java | 43 ++++++++++++++++--- .../hadoop/hdds/scm/node/SCMNodeManager.java | 30 +++++++++++++ .../resources/webapps/scm/scm-overview.html | 24 +++++++++++ .../src/main/resources/webapps/scm/scm.js | 35 +++++++++++---- 5 files changed, 126 insertions(+), 15 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java index ab296fc52bf8..53a189df6e8d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java @@ -227,6 +227,15 @@ public int getHealthyVolumeCount() { } } + public int getFailedVolumeCount() { + try { + lock.readLock().lock(); + return failedVolumeCount; + } finally { + lock.readLock().unlock(); + } + } + /** * Returns count of healthy metadata volumes reported from datanode. * @return count of healthy metdata log volumes diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 3307a292dca2..35628f79e8d4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -19,12 +19,7 @@ package org.apache.hadoop.hdds.scm.node; import java.io.Closeable; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; +import java.util.*; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; @@ -477,6 +472,22 @@ public List getDeadNodes() { return getNodes(null, NodeState.DEAD); } + public List getDecommissioningNodes() { + return getNodes(NodeOperationalState.DECOMMISSIONING, null); + } + + public int getDecommissioningNodeCount() { + return getDecommissioningNodes().size(); + } + + public List getEnteringMaintenanceNodes() { + return getNodes(NodeOperationalState.ENTERING_MAINTENANCE, null); + } + + public int getEnteringMaintenanceNodeCount() { + return getEnteringMaintenanceNodes().size(); + } + /** * Returns all the nodes with the specified status. * @@ -501,6 +512,26 @@ public List getNodes( return nodeStateMap.getDatanodeInfos(opState, health); } + public List getVolumeFailuresNodes() { + List allNodes = nodeStateMap.getAllDatanodeInfos(); + if (allNodes.size() < 1) { + return allNodes; + } + + List failedVolumeNodes = new ArrayList<>(); + for (DatanodeInfo dn : allNodes) { + if (dn.getFailedVolumeCount() > 0) { + failedVolumeNodes.add(dn); + } + } + + return failedVolumeNodes; + } + + public int getVolumeFailuresNodeCount() { + return getVolumeFailuresNodes().size(); + } + /** * Returns all the nodes which have registered to NodeStateManager. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 038f76b52e9a..595784029733 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -1223,6 +1223,8 @@ public Map getNodeStatistics() { Map nodeStatistics = new HashMap<>(); // Statistics node usaged nodeUsageStatistics(nodeStatistics); + // Statistics node states + nodeStateStatistics(nodeStatistics); // todo: Statistics of other instances return nodeStatistics; } @@ -1265,6 +1267,19 @@ private void nodeUsageStatistics(Map nodeStatics) { nodeStatics.put(UsageStatics.STDEV.getLabel(), decimalFormat.format(dev)); } + private void nodeStateStatistics(Map nodeStatics) { + int healthyNodeCount = nodeStateManager.getHealthyNodeCount(); + int deadNodeCount = nodeStateManager.getDeadNodeCount(); + int decommissioningNodeCount = nodeStateManager.getDecommissioningNodeCount(); + int enteringMaintenanceNodeCount = nodeStateManager.getEnteringMaintenanceNodeCount(); + int volumeFailuresNodeCount = nodeStateManager.getVolumeFailuresNodeCount(); + nodeStatics.put(StateStatics.HEALTHY.getLabel(), String.valueOf(healthyNodeCount)); + nodeStatics.put(StateStatics.DEAD.getLabel(), String.valueOf(deadNodeCount)); + nodeStatics.put(StateStatics.DECOMMISSIONING.getLabel(), String.valueOf(decommissioningNodeCount)); + nodeStatics.put(StateStatics.ENTERING_MAINTENANCE.getLabel(), String.valueOf(enteringMaintenanceNodeCount)); + nodeStatics.put(StateStatics.VOLUME_FAILURES.getLabel(), String.valueOf(volumeFailuresNodeCount)); + } + /** * Based on the current time and the last heartbeat, calculate the time difference * and get a string of the relative value. E.g. "2s ago", "1m 2s ago", etc. @@ -1346,6 +1361,21 @@ public String getLabel() { } } + private enum StateStatics { + HEALTHY("Healthy"), + DEAD("Dead"), + DECOMMISSIONING("Decommissioning"), + ENTERING_MAINTENANCE("EnteringMaintenance"), + VOLUME_FAILURES("VolumeFailures"); + private String label; + public String getLabel() { + return label; + } + StateStatics(String label) { + this.label = label; + } + } + /** * Returns the min of no healthy volumes reported out of the set * of datanodes constituting the pipeline. diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index 67655b539f0e..cc445df75779 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -51,6 +51,30 @@

Statistics

Standard Deviation {{statistics.nodes.usages.stdev}} + + Datanode State + Count + + + Healthy Nodes + {{statistics.nodes.state.healthy}} + + + Dead Nodes + {{statistics.nodes.state.dead}} + + + Decommissioning Nodes + {{statistics.nodes.state.decommissioning}} + + + Entering Maintenance Nodes + {{statistics.nodes.state.enteringmaintenance}} + + + Volume Failures Nodes + {{statistics.nodes.state.volumefailures}} + diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js index 8ca9fb257c9e..41dc25cb650f 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm.js @@ -39,6 +39,13 @@ max : "N/A", median : "N/A", stdev : "N/A" + }, + state : { + healthy : "N/A", + dead : "N/A", + decommissioning : "N/A", + enteringmaintenance : "N/A", + volumefailures : "N/A" } } } @@ -92,15 +99,25 @@ $scope.lastIndex = Math.ceil(nodeStatusCopy.length / $scope.RecordsToDisplay); $scope.nodeStatus = nodeStatusCopy.slice(0, $scope.RecordsToDisplay); - ctrl.nodemanagermetrics.NodeStatistics.forEach(function(obj) { - if(obj.key == "Min") { - $scope.statistics.nodes.usages.min = obj.value; - } else if(obj.key == "Max") { - $scope.statistics.nodes.usages.max = obj.value; - } else if(obj.key == "Median") { - $scope.statistics.nodes.usages.median = obj.value; - } else if(obj.key == "Stdev") { - $scope.statistics.nodes.usages.stdev = obj.value; + ctrl.nodemanagermetrics.NodeStatistics.forEach(({key, value}) => { + if(key == "Min") { + $scope.statistics.nodes.usages.min = value; + } else if(key == "Max") { + $scope.statistics.nodes.usages.max = value; + } else if(key == "Median") { + $scope.statistics.nodes.usages.median = value; + } else if(key == "Stdev") { + $scope.statistics.nodes.usages.stdev = value; + } else if(key == "Healthy") { + $scope.statistics.nodes.state.healthy = value; + } else if(key == "Dead") { + $scope.statistics.nodes.state.dead = value; + } else if(key == "Decommissioning") { + $scope.statistics.nodes.state.decommissioning = value; + } else if(key == "EnteringMaintenance") { + $scope.statistics.nodes.state.enteringmaintenance = value; + } else if(key == "VolumeFailures") { + $scope.statistics.nodes.state.volumefailures = value; } }); }); From 58ebd1b538a1c212abeb3b9eb3d0b09828611afe Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Sat, 3 Aug 2024 18:09:29 +0800 Subject: [PATCH 2/4] Fix some checkstyle --- .../org/apache/hadoop/hdds/scm/node/NodeStateManager.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 35628f79e8d4..9cbe4de4c9e1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -19,7 +19,13 @@ package org.apache.hadoop.hdds.scm.node; import java.io.Closeable; -import java.util.*; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.Set; +import java.util.UUID; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; From 62ed692482c7558497adcb6de68c9d44863645de Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Mon, 5 Aug 2024 23:27:17 +0800 Subject: [PATCH 3/4] Fix some javadoc --- .../hadoop/hdds/scm/node/DatanodeInfo.java | 4 +++ .../hdds/scm/node/NodeStateManager.java | 35 ++++++++++++++----- .../hadoop/hdds/scm/node/SCMNodeManager.java | 14 ++++---- .../resources/webapps/scm/scm-overview.html | 8 ++++- 4 files changed, 45 insertions(+), 16 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java index 53a189df6e8d..05ed833edbe9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java @@ -227,6 +227,10 @@ public int getHealthyVolumeCount() { } } + /** + * Returns count of failed volumes reported by the data node. + * @return count of failed volumes + */ public int getFailedVolumeCount() { try { lock.readLock().lock(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index 9cbe4de4c9e1..b96997fb1e1b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -22,7 +22,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.ArrayList; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -30,6 +29,7 @@ import java.util.concurrent.ScheduledFuture; import java.util.concurrent.TimeUnit; import java.util.function.Predicate; +import java.util.stream.Collectors; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocol.DatanodeDetails; @@ -478,18 +478,34 @@ public List getDeadNodes() { return getNodes(null, NodeState.DEAD); } + /** + * Returns all nodes that are in the decommissioning state. + * @return list of decommissioning nodes + */ public List getDecommissioningNodes() { return getNodes(NodeOperationalState.DECOMMISSIONING, null); } + /** + * Returns the count of decommissioning nodes. + * @return decommissioning node count + */ public int getDecommissioningNodeCount() { return getDecommissioningNodes().size(); } + /** + * Returns all nodes that are in the entering maintenance state. + * @return list of entering maintenance nodes + */ public List getEnteringMaintenanceNodes() { return getNodes(NodeOperationalState.ENTERING_MAINTENANCE, null); } + /** + * Returns the count of entering maintenance nodes. + * @return entering maintenance node count + */ public int getEnteringMaintenanceNodeCount() { return getEnteringMaintenanceNodes().size(); } @@ -518,22 +534,25 @@ public List getNodes( return nodeStateMap.getDatanodeInfos(opState, health); } + /** + * Returns all nodes that contain failed volumes. + * @return list of nodes containing failed volumes + */ public List getVolumeFailuresNodes() { List allNodes = nodeStateMap.getAllDatanodeInfos(); if (allNodes.size() < 1) { return allNodes; } - List failedVolumeNodes = new ArrayList<>(); - for (DatanodeInfo dn : allNodes) { - if (dn.getFailedVolumeCount() > 0) { - failedVolumeNodes.add(dn); - } - } - + List failedVolumeNodes = allNodes.stream(). + filter(dn -> dn.getFailedVolumeCount() > 0).collect(Collectors.toList()); return failedVolumeNodes; } + /** + * Returns the count of nodes containing the failed volume. + * @return failed volume node count + */ public int getVolumeFailuresNodeCount() { return getVolumeFailuresNodes().size(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java index 595784029733..3339b27f2cec 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/SCMNodeManager.java @@ -1273,11 +1273,11 @@ private void nodeStateStatistics(Map nodeStatics) { int decommissioningNodeCount = nodeStateManager.getDecommissioningNodeCount(); int enteringMaintenanceNodeCount = nodeStateManager.getEnteringMaintenanceNodeCount(); int volumeFailuresNodeCount = nodeStateManager.getVolumeFailuresNodeCount(); - nodeStatics.put(StateStatics.HEALTHY.getLabel(), String.valueOf(healthyNodeCount)); - nodeStatics.put(StateStatics.DEAD.getLabel(), String.valueOf(deadNodeCount)); - nodeStatics.put(StateStatics.DECOMMISSIONING.getLabel(), String.valueOf(decommissioningNodeCount)); - nodeStatics.put(StateStatics.ENTERING_MAINTENANCE.getLabel(), String.valueOf(enteringMaintenanceNodeCount)); - nodeStatics.put(StateStatics.VOLUME_FAILURES.getLabel(), String.valueOf(volumeFailuresNodeCount)); + nodeStatics.put(StateStatistics.HEALTHY.getLabel(), String.valueOf(healthyNodeCount)); + nodeStatics.put(StateStatistics.DEAD.getLabel(), String.valueOf(deadNodeCount)); + nodeStatics.put(StateStatistics.DECOMMISSIONING.getLabel(), String.valueOf(decommissioningNodeCount)); + nodeStatics.put(StateStatistics.ENTERING_MAINTENANCE.getLabel(), String.valueOf(enteringMaintenanceNodeCount)); + nodeStatics.put(StateStatistics.VOLUME_FAILURES.getLabel(), String.valueOf(volumeFailuresNodeCount)); } /** @@ -1361,7 +1361,7 @@ public String getLabel() { } } - private enum StateStatics { + private enum StateStatistics { HEALTHY("Healthy"), DEAD("Dead"), DECOMMISSIONING("Decommissioning"), @@ -1371,7 +1371,7 @@ private enum StateStatics { public String getLabel() { return label; } - StateStatics(String label) { + StateStatistics(String label) { this.label = label; } } diff --git a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html index cc445df75779..5a4f2ff633c8 100644 --- a/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html +++ b/hadoop-hdds/server-scm/src/main/resources/webapps/scm/scm-overview.html @@ -28,7 +28,7 @@

SCM Information

-

Statistics

+

Usage Statistics

@@ -51,6 +51,12 @@

Statistics

+ +
Standard Deviation {{statistics.nodes.usages.stdev}}
+ +

State Statistics

+ + From 5c4d446c5a23d241187905481ea1f98b0b1a3068 Mon Sep 17 00:00:00 2001 From: jianghuazhu <740087514@qq.com> Date: Tue, 6 Aug 2024 10:53:08 +0800 Subject: [PATCH 4/4] Update NodeStateManager --- .../org/apache/hadoop/hdds/scm/node/NodeStateManager.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index b96997fb1e1b..3c3ff8fb8338 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -540,10 +540,6 @@ public List getNodes( */ public List getVolumeFailuresNodes() { List allNodes = nodeStateMap.getAllDatanodeInfos(); - if (allNodes.size() < 1) { - return allNodes; - } - List failedVolumeNodes = allNodes.stream(). filter(dn -> dn.getFailedVolumeCount() > 0).collect(Collectors.toList()); return failedVolumeNodes;
Datanode State Count