diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java index 4c96175b6c0..0942b27898d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java @@ -460,6 +460,16 @@ public ContainerPlacementStatus validateContainerPlacement( } int maxReplicasPerRack = getMaxReplicasPerRack(replicas, Math.min(requiredRacks, numRacks)); + + // There are scenarios where there could be excessive replicas on a rack due to nodes + // in decommission or maintenance or over-replication in general. + // In these cases, ReplicationManager shouldn't report mis-replication. + // The original intention of mis-replication was to indicate that there isn't enough rack tolerance. + // In case of over-replication, this isn't an issue. + // Mis-replication should be an issue once over-replication is fixed, not before. + // Adjust the maximum number of replicas per rack to allow containers + // with excessive replicas to not be reported as mis-replicated. + maxReplicasPerRack += Math.max(0, dns.size() - replicas); return new ContainerPlacementStatusDefault( currentRackCount.size(), requiredRacks, numRacks, maxReplicasPerRack, currentRackCount); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java index 92f05d772fe..47b36e5fe5a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; @@ -52,6 +53,7 @@ import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; @@ -60,6 +62,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -563,6 +566,50 @@ public void testvalidateContainerPlacementSingleRackCluster() { assertEquals(0, stat.misReplicationCount()); } + @ParameterizedTest + @MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates") + public void testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) { + setup(7); + // 7 datanodes, all nodes are used. + // /rack0/node0 -> IN_SERVICE + // /rack0/node1 -> IN_SERVICE + // /rack0/node2 -> OFFLINE + // /rack0/node3 -> OFFLINE + // /rack0/node4 -> OFFLINE + // /rack1/node5 -> IN_SERVICE + // /rack1/node6 -> OFFLINE + datanodes.get(2).setPersistedOpState(state); + datanodes.get(3).setPersistedOpState(state); + datanodes.get(4).setPersistedOpState(state); + datanodes.get(6).setPersistedOpState(state); + List dns = new ArrayList<>(datanodes); + + ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(2, status.actualPlacementCount()); + assertEquals(2, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE + // /rack0/node1 -> IN_SERVICE + // /rack0/node2 -> OFFLINE > IN_SERVICE + // /rack0/node3 -> OFFLINE + // /rack0/node4 -> OFFLINE + // /rack1/node5 -> IN_SERVICE + // /rack1/node6 -> OFFLINE > IN_SERVICE + datanodes.get(2).setPersistedOpState(IN_SERVICE); + datanodes.get(6).setPersistedOpState(IN_SERVICE); + dns = new ArrayList<>(datanodes); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(2, status.actualPlacementCount()); + assertEquals(2, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + } + @ParameterizedTest @MethodSource("numDatanodes") public void testOutOfServiceNodesNotSelected(int datanodeCount) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java index 8e267f49885..b5453a5be8d 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; @@ -68,6 +69,7 @@ import static org.hamcrest.Matchers.matchesPattern; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -569,6 +571,72 @@ public void testValidateContainerPlacement(int datanodeCount) { assertEquals(0, stat.misReplicationCount()); } + @ParameterizedTest + @MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates") + public void testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) { + setup(9, 3); + // 9 datanodes, 3 per rack. + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 + // /rack0/node2 + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + List dns = new ArrayList<>(); + dns.add(datanodes.get(0)); + dns.add(datanodes.get(3)); + dns.add(datanodes.get(6)); + + ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 -> OFFLINE - used + // /rack0/node2 + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 -> OFFLINE - used + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + datanodes.get(1).setPersistedOpState(state); + datanodes.get(4).setPersistedOpState(state); + dns.add(datanodes.get(1)); + dns.add(datanodes.get(4)); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 -> OFFLINE - used + // /rack0/node2 -> IN_SERVICE - used + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 -> OFFLINE - used + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + dns.add(datanodes.get(2)); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + } + public List getDatanodes(List dnIndexes) { return dnIndexes.stream().map(datanodes::get).collect(Collectors.toList()); }