diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index fdf300f913dd0..9917fd4c5b376 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -4584,8 +4584,16 @@ void processExtraRedundancyBlocksOnInService( */ boolean isNodeHealthyForDecommissionOrMaintenance(DatanodeDescriptor node) { if (!node.checkBlockReportReceived()) { - LOG.info("Node {} hasn't sent its first block report.", node); - return false; + if (node.getCapacity() == 0 && node.getNumBlocks() == 0) { + // DataNode has a storage problem and doesn't send block reports. + // In this case, it is safe to decommission or put in maintenance. + LOG.info("The capacity and the number of blocks of {} are zero. " + + "Safe to decommission or put in maintenance.", node); + return true; + } else { + LOG.info("Node {} hasn't sent its first block report.", node); + return false; + } } if (node.isAlive()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 18209a4d179e6..6fb7ce25a0c76 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -1587,4 +1587,50 @@ public Boolean get() { cleanupFile(fileSys, file); } + + /** + * DataNodes with capacity 0 should be decommissioned immediately + * even if they haven't reported the first block report. + */ + @Test(timeout=60000) + public void testCapacityZeroNodesDecommission() throws Exception { + int numNamenodes = 1; + int numDatanodes = 3; + startCluster(numNamenodes, numDatanodes); + + // start 1 more datanode with capacity 0 + int numOfNewDatanodes = 1; + int storagesPerDatanode = 2; + long[][] capacities = new long[numOfNewDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfNewDatanodes; i++) { + for (int j = 0; j < storagesPerDatanode; j++) { + capacities[i][j] = 0; + } + } + getCluster().startDataNodes(getConf(), 1, null, true, null, null, null, + capacities, null, false, false, false, null); + getCluster().triggerHeartbeats(); + + // clear the block report count of the datanode with capacity 0 + BlockManager bm = getCluster().getNamesystem().getBlockManager(); + DatanodeManager dm = bm.getDatanodeManager(); + DataNode dataNode = getCluster().getDataNodes().get(numDatanodes); + DatanodeID dnID = dataNode.getDatanodeId(); + DatanodeDescriptor capacityZeroNode = dm.getDatanode(dnID); + capacityZeroNode.updateRegInfo(dnID); + // disable heartbeat not to send the first block report + DataNodeTestUtils.setHeartbeatsDisabledForTests(dataNode, true); + + // decommission the datanode with capacity 0 + ArrayList nodes = new ArrayList<>(); + nodes.add(capacityZeroNode.getXferAddr()); + initExcludeHosts(nodes); + refreshNodes(0); + waitNodeState(capacityZeroNode, AdminStates.DECOMMISSIONED); + + // it should be decommissioned immediately + FSNamesystem ns = getCluster().getNamesystem(0); + int liveDecommissioned = ns.getNumDecomLiveDataNodes(); + assertEquals(1, liveDecommissioned); + } }