From 81cd61d7d9135236c9e95d468995792a1f62f3f5 Mon Sep 17 00:00:00 2001 From: Siddhant Sangwan Date: Fri, 20 Jan 2023 16:11:02 +0530 Subject: [PATCH] HDDS-7813. Handle Mismatched Replicas (OPEN or CLOSING) of QUASI-CLOSED containers in RM --- .../replication/ReplicationManager.java | 4 +- ...er.java => MismatchedReplicasHandler.java} | 44 +++++++++------ ...ava => TestMismatchedReplicasHandler.java} | 56 +++++++++++++++++-- 3 files changed, 82 insertions(+), 22 deletions(-) rename hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/{ClosedWithMismatchedReplicasHandler.java => MismatchedReplicasHandler.java} (64%) rename hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/{TestClosedWithMismatchedReplicasHandler.java => TestMismatchedReplicasHandler.java} (79%) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 665f58fa73bb..535f0dde9d5a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -37,7 +37,7 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; -import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithMismatchedReplicasHandler; +import org.apache.hadoop.hdds.scm.container.replication.health.MismatchedReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler; import org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler; import org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler; @@ -243,7 +243,7 @@ public ReplicationManager(final ConfigurationSource conf, containerCheckChain .addNext(new ClosingContainerHandler(this)) .addNext(new QuasiClosedContainerHandler(this)) - .addNext(new ClosedWithMismatchedReplicasHandler(this)) + .addNext(new MismatchedReplicasHandler(this)) .addNext(new EmptyContainerHandler(this)) .addNext(new DeletingContainerHandler(this)) .addNext(ecReplicationCheckHandler) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java similarity index 64% rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java index 4428428d17c6..87730b2398b2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java @@ -29,38 +29,43 @@ import java.util.Set; /** - * Handler to process containers which are closed, but some replicas are still - * open or closing. This handler will send a command to the datanodes for each - * mis-matched replica to close it. + * Handler to process containers which are closed or quasi-closed, but some + * replicas are still open or closing. This handler will send a command to + * the datanodes for each mis-matched replica to close it. */ -public class ClosedWithMismatchedReplicasHandler extends AbstractCheck { +public class MismatchedReplicasHandler extends AbstractCheck { public static final Logger LOG = - LoggerFactory.getLogger(ClosedWithMismatchedReplicasHandler.class); + LoggerFactory.getLogger(MismatchedReplicasHandler.class); private ReplicationManager replicationManager; - public ClosedWithMismatchedReplicasHandler( + public MismatchedReplicasHandler( ReplicationManager replicationManager) { this.replicationManager = replicationManager; } /** - * Handles CLOSED EC or RATIS container. If some replicas are CLOSING or - * OPEN, this sends a force-close command for them. + * Handles CLOSED EC or CLOSED/QUASI-CLOSED RATIS containers. If some + * replicas are CLOSING or OPEN, this tries to close them. Force close + * command is sent for CLOSED and close command is sent for QUASI-CLOSED + * containers (replicas of quasi-closed containers should move to + * quasi-closed state). + * * @param request ContainerCheckRequest object representing the container - * @return always returns true so that other handlers in the chain can fix + * @return always returns false so that other handlers in the chain can fix * issues such as under replication */ @Override public boolean handle(ContainerCheckRequest request) { ContainerInfo containerInfo = request.getContainerInfo(); Set replicas = request.getContainerReplicas(); - if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED) { - // Handler is only relevant for CLOSED containers. + if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED && + containerInfo.getState() != HddsProtos.LifeCycleState.QUASI_CLOSED) { + // Handler is only relevant for CLOSED or QUASI-CLOSED containers. return false; } - LOG.debug("Checking container {} in ClosedWithMismatchedReplicasHandler", + LOG.debug("Checking container {} in MismatchedReplicasHandler", containerInfo); // close replica if its state is OPEN or CLOSING @@ -68,8 +73,15 @@ public boolean handle(ContainerCheckRequest request) { if (isMismatched(replica)) { LOG.debug("Sending close command for mismatched replica {} of " + "container {}.", replica, containerInfo); - replicationManager.sendCloseContainerReplicaCommand( - containerInfo, replica.getDatanodeDetails(), true); + + if (containerInfo.getState() == HddsProtos.LifeCycleState.CLOSED) { + replicationManager.sendCloseContainerReplicaCommand( + containerInfo, replica.getDatanodeDetails(), true); + } else if (containerInfo.getState() == + HddsProtos.LifeCycleState.QUASI_CLOSED) { + replicationManager.sendCloseContainerReplicaCommand( + containerInfo, replica.getDatanodeDetails(), false); + } } } @@ -81,8 +93,8 @@ public boolean handle(ContainerCheckRequest request) { } /** - * If a CLOSED container has an OPEN or CLOSING replica, there is a state - * mismatch. + * If a CLOSED or QUASI-CLOSED container has an OPEN or CLOSING replica, + * there is a state mismatch. * @param replica replica to check for mismatch * @return true if the replica is in CLOSING or OPEN state, else false */ diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java similarity index 79% rename from hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java rename to hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java index f2b3a72885df..e67c1fa6c6c6 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java @@ -38,17 +38,18 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.times; /** - * Tests for the ClosedWithMismatchedReplicasHandler. + * Tests for the MismatchedReplicasHandler. */ -public class TestClosedWithMismatchedReplicasHandler { +public class TestMismatchedReplicasHandler { private ReplicationManager replicationManager; - private ClosedWithMismatchedReplicasHandler handler; + private MismatchedReplicasHandler handler; private ECReplicationConfig ecReplicationConfig; private RatisReplicationConfig ratisReplicationConfig; @@ -58,7 +59,7 @@ public void setup() { ratisReplicationConfig = RatisReplicationConfig.getInstance( HddsProtos.ReplicationFactor.THREE); replicationManager = Mockito.mock(ReplicationManager.class); - handler = new ClosedWithMismatchedReplicasHandler(replicationManager); + handler = new MismatchedReplicasHandler(replicationManager); } @Test @@ -222,4 +223,51 @@ public void testCloseCommandSentForMismatchedRatisReplicas() { .sendCloseContainerReplicaCommand( containerInfo, mismatch3.getDatanodeDetails(), true); } + + /** + * Mismatched replicas (OPEN or CLOSING) of a QUASI-CLOSED ratis container + * should be sent close (and not force close) commands. + */ + @Test + public void testCloseCommandSentForMismatchedReplicaOfQuasiClosedContainer() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + ratisReplicationConfig, 1, QUASI_CLOSED); + ContainerReplica mismatch1 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.OPEN); + ContainerReplica mismatch2 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.CLOSING); + ContainerReplica mismatch3 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.UNHEALTHY); + Set containerReplicas = new HashSet<>(); + containerReplicas.add(mismatch1); + containerReplicas.add(mismatch2); + containerReplicas.add(mismatch3); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(new ReplicationManagerReport()) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .build(); + + // this handler always returns false so other handlers can fix issues + // such as under replication + Assertions.assertFalse(handler.handle(request)); + + Mockito.verify(replicationManager, times(1)) + .sendCloseContainerReplicaCommand( + containerInfo, mismatch1.getDatanodeDetails(), false); + Mockito.verify(replicationManager, times(1)) + .sendCloseContainerReplicaCommand( + containerInfo, mismatch2.getDatanodeDetails(), false); + // close command should not be sent for unhealthy replica + Mockito.verify(replicationManager, times(0)) + .sendCloseContainerReplicaCommand( + containerInfo, mismatch3.getDatanodeDetails(), false); + } }