diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java new file mode 100644 index 000000000000..3ff23b495e8c --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/QuasiClosedStuckOverReplicationHandler.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import java.io.IOException; +import java.util.Comparator; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; + +/** + * Class to correct over replicated QuasiClosed Stuck Ratis containers. + */ +public class QuasiClosedStuckOverReplicationHandler implements UnhealthyReplicationHandler { + + private static final org.slf4j.Logger LOG = + org.slf4j.LoggerFactory.getLogger(QuasiClosedStuckOverReplicationHandler.class); + private final ReplicationManager replicationManager; + private final ReplicationManagerMetrics metrics; + + public QuasiClosedStuckOverReplicationHandler(final ReplicationManager replicationManager) { + this.replicationManager = replicationManager; + this.metrics = replicationManager.getMetrics(); + } + + @Override + public int processAndSendCommands(Set replicas, List pendingOps, + ContainerHealthResult result, int remainingMaintenanceRedundancy) + throws IOException { + + ContainerInfo containerInfo = result.getContainerInfo(); + LOG.debug("Handling over replicated QuasiClosed Stuck Ratis container {}", containerInfo); + + int pendingDelete = 0; + for (ContainerReplicaOp op : pendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + pendingDelete++; + } + } + + if (pendingDelete > 0) { + LOG.debug("Container {} has pending delete operations. No more over replication will be scheduled until they " + + "complete", containerInfo); + return 0; + } + + // Filter out any STALE replicas, as they may go dead soon. If so, we don't want to remove other healthy replicas + // instead of them, as they could result in under replication. + Set healthyReplicas = replicas.stream() + .filter(replica -> { + try { + return replicationManager.getNodeStatus( + replica.getDatanodeDetails()).getHealth() == HddsProtos.NodeState.HEALTHY; + } catch (NodeNotFoundException e) { + return false; + } + }) + .collect(Collectors.toSet()); + + QuasiClosedStuckReplicaCount replicaCount = + new QuasiClosedStuckReplicaCount(healthyReplicas, remainingMaintenanceRedundancy); + + List misReplicatedOrigins + = replicaCount.getOverReplicatedOrigins(); + + if (misReplicatedOrigins.isEmpty()) { + LOG.debug("Container {} is not over replicated", containerInfo); + return 0; + } + + int totalCommandsSent = 0; + IOException firstException = null; + for (QuasiClosedStuckReplicaCount.MisReplicatedOrigin origin : misReplicatedOrigins) { + List sortedReplicas = getSortedReplicas(origin.getSources()); + for (int i = 0; i < origin.getReplicaDelta(); i++) { + try { + replicationManager.sendThrottledDeleteCommand( + containerInfo, 0, sortedReplicas.get(i).getDatanodeDetails(), true); + totalCommandsSent++; + } catch (CommandTargetOverloadedException e) { + LOG.debug("Unable to send delete command for container {} to {} as it has too many pending delete commands", + containerInfo, sortedReplicas.get(i).getDatanodeDetails()); + firstException = e; + } + } + } + + if (firstException != null) { + // Some nodes were overloaded when attempting to send commands. + if (totalCommandsSent > 0) { + metrics.incrPartialReplicationTotal(); + } + throw firstException; + } + return totalCommandsSent; + } + + private List getSortedReplicas( + Set replicas) { + // sort replicas so that they can be selected in a deterministic way + return replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index edaaf7ef2627..09245b2bee7a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -184,6 +184,7 @@ public class ReplicationManager implements SCMService, ContainerReplicaPendingOp private final RatisOverReplicationHandler ratisOverReplicationHandler; private final RatisMisReplicationHandler ratisMisReplicationHandler; private final QuasiClosedStuckUnderReplicationHandler quasiClosedStuckUnderReplicationHandler; + private final QuasiClosedStuckOverReplicationHandler quasiClosedStuckOverReplicationHandler; private Thread underReplicatedProcessorThread; private Thread overReplicatedProcessorThread; private final UnderReplicatedProcessor underReplicatedProcessor; @@ -252,6 +253,7 @@ public ReplicationManager(final ConfigurationSource conf, ratisContainerPlacement, conf, this); quasiClosedStuckUnderReplicationHandler = new QuasiClosedStuckUnderReplicationHandler(ratisContainerPlacement, conf, this); + quasiClosedStuckOverReplicationHandler = new QuasiClosedStuckOverReplicationHandler(this); underReplicatedProcessor = new UnderReplicatedProcessor(this, rmConf::getUnderReplicatedInterval); overReplicatedProcessor = @@ -781,8 +783,16 @@ int processOverReplicatedContainer( containerReplicaPendingOps.getPendingOps(containerID); final boolean isEC = isEC(result.getContainerInfo().getReplicationConfig()); - final UnhealthyReplicationHandler handler = isEC ? ecOverReplicationHandler - : ratisOverReplicationHandler; + UnhealthyReplicationHandler handler; + if (isEC) { + handler = ecOverReplicationHandler; + } else { + if (QuasiClosedStuckReplicationCheck.shouldHandleAsQuasiClosedStuck(result.getContainerInfo(), replicas)) { + handler = quasiClosedStuckOverReplicationHandler; + } else { + handler = ratisOverReplicationHandler; + } + } return handler.processAndSendCommands(replicas, pendingOps, result, getRemainingMaintenanceRedundancy(isEC)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java index 93c670e3ab4f..444affca7db9 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationTestUtil.java @@ -504,9 +504,29 @@ public static void mockRMSendDeleteCommand(ReplicationManager mock, * @param commandsSent Set to add the command to rather than sending it. */ public static void mockRMSendThrottledDeleteCommand(ReplicationManager mock, - Set>> commandsSent) + Set>> commandsSent) + throws NotLeaderException, CommandTargetOverloadedException { + mockRMSendThrottledDeleteCommand(mock, commandsSent, new AtomicBoolean(false)); + } + + /** + * Given a Mockito mock of ReplicationManager, this method will mock the + * sendThrottledDeleteCommand method so that it adds the command created to + * the commandsSent set. + * @param mock Mock of ReplicationManager + * @param commandsSent Set to add the command to rather than sending it. + * @param throwOverloaded If the atomic boolean is true, throw a + * CommandTargetOverloadedException and set the boolean + * to false, instead of creating the replicate command. + */ + public static void mockRMSendThrottledDeleteCommand(ReplicationManager mock, + Set>> commandsSent, AtomicBoolean throwOverloaded) throws NotLeaderException, CommandTargetOverloadedException { doAnswer((Answer) invocationOnMock -> { + if (throwOverloaded.get()) { + throwOverloaded.set(false); + throw new CommandTargetOverloadedException("Overloaded"); + } ContainerInfo containerInfo = invocationOnMock.getArgument(0); int replicaIndex = invocationOnMock.getArgument(1); DatanodeDetails target = invocationOnMock.getArgument(2); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java new file mode 100644 index 000000000000..20f6de21b03e --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestQuasiClosedStuckOverReplicationHandler.java @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.replication; + +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.ContainerReplica; +import org.apache.hadoop.hdds.scm.node.NodeStatus; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import org.apache.hadoop.ozone.protocol.commands.SCMCommand; +import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Test for QuasiClosedStuckOverReplicationHandler. + */ +public class TestQuasiClosedStuckOverReplicationHandler { + + private static final RatisReplicationConfig RATIS_REPLICATION_CONFIG = RatisReplicationConfig.getInstance(THREE); + private ContainerInfo container; + private ReplicationManager replicationManager; + private ReplicationManagerMetrics metrics; + private Set>> commandsSent; + private QuasiClosedStuckOverReplicationHandler handler; + private UUID origin1 = UUID.randomUUID(); + private UUID origin2 = UUID.randomUUID(); + + @BeforeEach + void setup() throws NodeNotFoundException, + CommandTargetOverloadedException, NotLeaderException { + container = ReplicationTestUtil.createContainer( + HddsProtos.LifeCycleState.QUASI_CLOSED, RATIS_REPLICATION_CONFIG); + + replicationManager = mock(ReplicationManager.class); + OzoneConfiguration ozoneConfiguration = new OzoneConfiguration(); + ozoneConfiguration.setBoolean("hdds.scm.replication.push", true); + when(replicationManager.getConfig()) + .thenReturn(ozoneConfiguration.getObject( + ReplicationManager.ReplicationManagerConfiguration.class)); + metrics = ReplicationManagerMetrics.create(replicationManager); + when(replicationManager.getMetrics()).thenReturn(metrics); + + /* + Return NodeStatus with NodeOperationalState as specified in + DatanodeDetails, and NodeState as HEALTHY. + */ + when( + replicationManager.getNodeStatus(any(DatanodeDetails.class))) + .thenAnswer(invocationOnMock -> { + DatanodeDetails dn = invocationOnMock.getArgument(0); + return new NodeStatus(dn.getPersistedOpState(), + HddsProtos.NodeState.HEALTHY); + }); + + commandsSent = new HashSet<>(); + ReplicationTestUtil.mockRMSendThrottledDeleteCommand( + replicationManager, commandsSent); + handler = new QuasiClosedStuckOverReplicationHandler(replicationManager); + } + + @Test + public void testReturnsZeroIfNotOverReplicated() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testNoCommandsScheduledIfPendingOps() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + List pendingOps = new ArrayList<>(); + pendingOps.add(ContainerReplicaOp.create( + ContainerReplicaOp.PendingOpType.DELETE, MockDatanodeDetails.randomDatanodeDetails(), 0)); + + int count = handler.processAndSendCommands(replicas, pendingOps, getOverReplicatedHealthResult(), 1); + assertEquals(0, count); + } + + @Test + public void testCommandScheduledForOverReplicatedContainer() throws IOException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + int count = handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1); + assertEquals(1, count); + SCMCommand command = commandsSent.iterator().next().getRight(); + assertEquals(StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type.deleteContainerCommand, command.getType()); + } + + @Test + public void testOverloadedExceptionContinuesAndThrows() throws NotLeaderException, CommandTargetOverloadedException { + Set replicas = ReplicationTestUtil.createReplicasWithOriginAndOpState(container.containerID(), + StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.QUASI_CLOSED, + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin1, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE), + Pair.of(origin2, HddsProtos.NodeOperationalState.IN_SERVICE)); + + ReplicationTestUtil.mockRMSendThrottledDeleteCommand(replicationManager, commandsSent, new AtomicBoolean(true)); + + assertThrows(CommandTargetOverloadedException.class, () -> + handler.processAndSendCommands(replicas, Collections.emptyList(), getOverReplicatedHealthResult(), 1)); + assertEquals(1, commandsSent.size()); + } + + + private ContainerHealthResult.OverReplicatedHealthResult getOverReplicatedHealthResult() { + ContainerHealthResult.OverReplicatedHealthResult + healthResult = mock(ContainerHealthResult.OverReplicatedHealthResult.class); + when(healthResult.getContainerInfo()).thenReturn(container); + return healthResult; + } + +} diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java index 6484fc3b976b..79f6299bace2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManagerScenarios.java @@ -130,6 +130,7 @@ private static List getTestFiles() throws URISyntaxException { private static List loadTestsInFile(URI testFile) throws IOException { + System.out.println("Loading test file: " + testFile); ObjectReader reader = new ObjectMapper().readerFor(Scenario.class); try (InputStream stream = testFile.toURL().openStream()) { try (MappingIterator iterator = reader.readValues(stream)) { @@ -332,11 +333,10 @@ private void assertExpectedCommands(Scenario scenario, boolean found = false; for (Pair> command : commandsSent) { if (command.getRight().getType() == expectedCommand.getType()) { - DatanodeDetails targetDatanode = expectedCommand.getTargetDatanode(); - if (targetDatanode != null) { + if (expectedCommand.hasExpectedDatanode()) { // We need to assert against the command the datanode is sent to DatanodeDetails commandDatanode = findDatanodeFromUUID(command.getKey()); - if (commandDatanode != null && commandDatanode.equals(targetDatanode)) { + if (commandDatanode != null && expectedCommand.isTargetExpected(commandDatanode)) { found = true; commandsSent.remove(command); break; @@ -550,6 +550,7 @@ public int getOverReplicatedQueue() { public static class ExpectedCommands { private SCMCommandProto.Type type; private String datanode; + private Set expectedDatanodes; public void setDatanode(String datanode) { this.datanode = datanode; @@ -564,15 +565,33 @@ public SCMCommandProto.Type getType() { return type; } - public DatanodeDetails getTargetDatanode() { + public boolean hasExpectedDatanode() { + createExpectedDatanodes(); + return !expectedDatanodes.isEmpty(); + } + + public boolean isTargetExpected(DatanodeDetails dn) { + createExpectedDatanodes(); + return expectedDatanodes.contains(dn); + } + + private void createExpectedDatanodes() { + if (expectedDatanodes != null) { + return; + } + this.expectedDatanodes = new HashSet<>(); if (datanode == null) { - return null; + return; } - DatanodeDetails datanodeDetails = DATANODE_ALIASES.get(this.datanode); - if (datanodeDetails == null) { - fail("Unable to find a datanode for the alias: " + datanode + " in the expected commands."); + String[] nodes = datanode.split("\\|"); + for (String n : nodes) { + DatanodeDetails dn = DATANODE_ALIASES.get(n); + if (dn != null) { + expectedDatanodes.add(dn); + } else { + fail("Expected datanode not found: " + datanode); + } } - return datanodeDetails; } } diff --git a/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json b/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json index 8b841635b730..3e6217d5dc88 100644 --- a/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json +++ b/hadoop-hdds/server-scm/src/test/resources/replicationManagerTests/quasi_closed.json @@ -179,7 +179,90 @@ "expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, "checkCommands": [], "commands": [ - { "type": "replicateContainerCommand"} + { "type": "replicateContainerCommand", "datanode": "d4|d5" }] + }, + { "description": "Quasi-Closed stuck one origin over replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o1"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3|d4" } ] + }, + { "description": "Quasi-Closed stuck two origins over replicated", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3" }, + { "type": "deleteContainerCommand", "datanode": "d4|d5|d6" } + ] + }, + { "description": "Quasi-Closed stuck two origins not over replicated with maintenance", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "IN_MAINTENANCE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "IN_MAINTENANCE"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, + { "description": "Quasi-Closed stuck two origins not over replicated with decommission", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "DECOMMISSIONED" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "DECOMMISSIONED"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] + }, + { "description": "Quasi-Closed stuck two origins over replicated with maintenance", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o1", "operationalState": "IN_MAINTENANCE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d7", "sequenceId": 10, "isEmpty": false, "origin": "o2", "operationalState": "IN_MAINTENANCE"} + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [ + { "type": "deleteContainerCommand", "datanode": "d1|d2|d3" } + ] + }, + { "description": "Quasi-Closed stuck two origins over replicated with stale", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10, + "replicas": [ + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o1"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o1", "healthState": "STALE" }, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d4", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d5", "sequenceId": 10, "isEmpty": false, "origin": "o2"}, + { "state": "QUASI_CLOSED", "index": 0, "datanode": "d6", "sequenceId": 10, "isEmpty": false, "origin": "o2", "healthState": "STALE" } + ], + "expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 1, "overReplicatedQueue": 1, "quasiClosedStuck": 1, "unhealthy": 0 }, + "checkCommands": [], + "commands": [] } ]