diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java index 97fece928b34..5fde4659829b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java @@ -57,8 +57,13 @@ public MismatchedReplicasHandler( */ @Override public boolean handle(ContainerCheckRequest request) { - ContainerInfo containerInfo = request.getContainerInfo(); - Set replicas = request.getContainerReplicas(); + if (request.isReadOnly()) { + return false; + } + + final ContainerInfo containerInfo = request.getContainerInfo(); + final Set replicas = request.getContainerReplicas(); + if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED && containerInfo.getState() != HddsProtos.LifeCycleState.QUASI_CLOSED) { // Handler is only relevant for CLOSED or QUASI-CLOSED containers. @@ -66,24 +71,14 @@ public boolean handle(ContainerCheckRequest request) { } LOG.debug("Checking container {} in MismatchedReplicasHandler", containerInfo); - - if (request.isReadOnly()) { - return false; - } // close replica if needed for (ContainerReplica replica : replicas) { - if (shouldBeClosed(containerInfo, replica)) { + ContainerReplicaProto.State replicaState = getTransitionState(containerInfo, replica); + if (replicaState != null) { LOG.debug("Sending close command for mismatched replica {} of " + "container {}.", replica, containerInfo); - - if (containerInfo.getState() == HddsProtos.LifeCycleState.CLOSED) { - replicationManager.sendCloseContainerReplicaCommand( - containerInfo, replica.getDatanodeDetails(), true); - } else if (containerInfo.getState() == - HddsProtos.LifeCycleState.QUASI_CLOSED) { - replicationManager.sendCloseContainerReplicaCommand( - containerInfo, replica.getDatanodeDetails(), false); - } + replicationManager.sendCloseContainerReplicaCommand( + containerInfo, replica.getDatanodeDetails(), ContainerReplicaProto.State.CLOSED.equals(replicaState)); } } @@ -95,23 +90,24 @@ public boolean handle(ContainerCheckRequest request) { } /** - * If a CLOSED or QUASI-CLOSED container has an OPEN or CLOSING replica, - * there is a state mismatch. QUASI_CLOSED replica of a CLOSED container - * should be closed if their sequence IDs match. + * Returns the final expected closed state type based on the scm container state and the replica state. * @param replica replica to check for mismatch and if it should be closed - * @return true if the replica should be closed, else false + * @return null if the replica should not be closed, else CLOSED/QUASI_CLOSED based on the replica's + * state. */ - private boolean shouldBeClosed(ContainerInfo container, - ContainerReplica replica) { + private ContainerReplicaProto.State getTransitionState(ContainerInfo container, + ContainerReplica replica) { if (replica.getState() == ContainerReplicaProto.State.OPEN || replica.getState() == ContainerReplicaProto.State.CLOSING) { - return true; + return HddsProtos.ReplicationType.RATIS == container.getReplicationType() ? + ContainerReplicaProto.State.QUASI_CLOSED : ContainerReplicaProto.State.CLOSED; } // a quasi closed replica of a closed container should be closed if their // sequence IDs match return container.getState() == HddsProtos.LifeCycleState.CLOSED && replica.getState() == ContainerReplicaProto.State.QUASI_CLOSED && - container.getSequenceId() == replica.getSequenceId(); + container.getSequenceId() == replica.getSequenceId() ? ContainerReplicaProto.State.CLOSED + : null; } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java index 769007546b6b..d8b660c9a3c1 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java @@ -223,12 +223,12 @@ public void testCloseCommandSentForMismatchedRatisReplicas() { assertFalse(handler.handle(readRequest)); verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( - containerInfo, mismatch1.getDatanodeDetails(), true); + containerInfo, mismatch1.getDatanodeDetails(), false); verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( - containerInfo, mismatch2.getDatanodeDetails(), true); + containerInfo, mismatch2.getDatanodeDetails(), false); // close command should not be sent for unhealthy replica verify(replicationManager, times(0)).sendCloseContainerReplicaCommand( - containerInfo, mismatch3.getDatanodeDetails(), true); + containerInfo, mismatch3.getDatanodeDetails(), false); } /** @@ -326,4 +326,59 @@ public void testQuasiClosedReplicaOfClosedContainer() { verify(replicationManager, times(0)).sendCloseContainerReplicaCommand(containerInfo, differentSeqID.getDatanodeDetails(), true); } + + @Test + public void testCloseCommandSentForMismatchedRatisReplicasWithIncorrectBCSID() { + ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo( + ratisReplicationConfig, 1, CLOSED, 1000); + ContainerReplica mismatch1 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.OPEN, 99); + ContainerReplica mismatch2 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.CLOSING, 999); + ContainerReplica mismatch3 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.QUASI_CLOSED, 1000); + ContainerReplica mismatch4 = ReplicationTestUtil.createContainerReplica( + containerInfo.containerID(), 0, + HddsProtos.NodeOperationalState.IN_SERVICE, + ContainerReplicaProto.State.CLOSING, 1000); + Set containerReplicas = new HashSet<>(); + containerReplicas.add(mismatch1); + containerReplicas.add(mismatch2); + containerReplicas.add(mismatch3); + containerReplicas.add(mismatch4); + ContainerCheckRequest request = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(new ReplicationManagerReport()) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .build(); + ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder() + .setPendingOps(Collections.emptyList()) + .setReport(new ReplicationManagerReport()) + .setContainerInfo(containerInfo) + .setContainerReplicas(containerReplicas) + .setReadOnly(true) + .build(); + + // this handler always returns false so other handlers can fix issues + // such as under replication + assertFalse(handler.handle(request)); + assertFalse(handler.handle(readRequest)); + + verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( + containerInfo, mismatch1.getDatanodeDetails(), false); + verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( + containerInfo, mismatch2.getDatanodeDetails(), false); + // close command should not be sent for unhealthy replica + verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( + containerInfo, mismatch3.getDatanodeDetails(), true); + verify(replicationManager, times(1)).sendCloseContainerReplicaCommand( + containerInfo, mismatch4.getDatanodeDetails(), false); + } }