diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java index 982325d500f7..d3bef7d9e2e1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.scm.PlacementPolicy; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.slf4j.Logger; @@ -52,8 +53,12 @@ public class RatisOverReplicationHandler public static final Logger LOG = LoggerFactory.getLogger(RatisOverReplicationHandler.class); - public RatisOverReplicationHandler(PlacementPolicy placementPolicy) { + private final NodeManager nodeManager; + + public RatisOverReplicationHandler(PlacementPolicy placementPolicy, + NodeManager nodeManager) { super(placementPolicy); + this.nodeManager = nodeManager; } /** @@ -79,6 +84,20 @@ public Map> processAndCreateCommands( ContainerInfo containerInfo = result.getContainerInfo(); LOG.debug("Handling container {}.", containerInfo); + // We are going to check for over replication, so we should filter out any + // replicas that are not in a HEALTHY state. This is because a replica can + // be healthy, stale or dead. If it is dead is will be quickly removed from + // scm. If it is state, there is a good chance the DN is offline and the + // replica will go away soon. So, if we have a container that is over + // replicated with a HEALTHY and STALE replica, and we decide to delete the + // HEALTHY one, and then the STALE ones goes away, we will lose them both. + // To avoid this, we will filter out any non-healthy replicas first. + Set healthyReplicas = replicas.stream() + .filter(r -> ReplicationManager.getNodeStatus( + r.getDatanodeDetails(), nodeManager).isHealthy() + ) + .collect(Collectors.toSet()); + // count pending adds and deletes int pendingAdd = 0, pendingDelete = 0; for (ContainerReplicaOp op : pendingOps) { @@ -89,8 +108,9 @@ public Map> processAndCreateCommands( } } RatisContainerReplicaCount replicaCount = - new RatisContainerReplicaCount(containerInfo, replicas, pendingAdd, - pendingDelete, containerInfo.getReplicationFactor().getNumber(), + new RatisContainerReplicaCount(containerInfo, healthyReplicas, + pendingAdd, pendingDelete, + containerInfo.getReplicationFactor().getNumber(), minHealthyForMaintenance); // verify that this container is actually over replicated diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 6f93e81ead0d..665f58fa73bb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -229,7 +229,7 @@ public ReplicationManager(final ConfigurationSource conf, ratisUnderReplicationHandler = new RatisUnderReplicationHandler( ratisContainerPlacement, conf, nodeManager); ratisOverReplicationHandler = - new RatisOverReplicationHandler(ratisContainerPlacement); + new RatisOverReplicationHandler(ratisContainerPlacement, nodeManager); underReplicatedProcessor = new UnderReplicatedProcessor(this, rmConf.getUnderReplicatedInterval()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java index 46e844d9a70e..0bf0f1d7464f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java @@ -28,6 +28,8 @@ import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ozone.test.GenericTestUtils; @@ -56,6 +58,7 @@ public class TestRatisOverReplicationHandler { private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = RatisReplicationConfig.getInstance(HddsProtos.ReplicationFactor.THREE); private PlacementPolicy policy; + private NodeManager nodeManager; @Before public void setup() throws NodeNotFoundException { @@ -67,6 +70,10 @@ public void setup() throws NodeNotFoundException { Mockito.anyList(), Mockito.anyInt())) .thenReturn(new ContainerPlacementStatusDefault(2, 2, 3)); + nodeManager = Mockito.mock(NodeManager.class); + Mockito.when(nodeManager.getNodeStatus(Mockito.any())) + .thenReturn(NodeStatus.inServiceHealthy()); + GenericTestUtils.setLogLevel(RatisOverReplicationHandler.LOG, Level.DEBUG); } @@ -88,6 +95,23 @@ public void testOverReplicatedClosedContainer() throws IOException { 1); } + /** + * Container has 4 replicas and 1 stale so none should be deleted. + */ + @Test + public void testOverReplicatedClosedContainerWithStale() throws IOException, + NodeNotFoundException { + Set replicas = createReplicas(container.containerID(), + ContainerReplicaProto.State.CLOSED, 0, 0, 0, 0); + + ContainerReplica stale = replicas.stream().findFirst().get(); + Mockito.when(nodeManager.getNodeStatus(stale.getDatanodeDetails())) + .thenReturn(NodeStatus.inServiceStale()); + + testProcessing(replicas, Collections.emptyList(), + getOverReplicatedHealthResult(), 0); + } + /** * The container is quasi closed. All 4 replicas are quasi closed and * originate from the same datanode. This container is over replicated. @@ -261,7 +285,7 @@ private Map> testProcessing( ContainerHealthResult healthResult, int expectNumCommands) throws IOException { RatisOverReplicationHandler handler = - new RatisOverReplicationHandler(policy); + new RatisOverReplicationHandler(policy, nodeManager); Map> commands = handler.processAndCreateCommands(replicas, pendingOps,