Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,33 @@ private boolean canForceCloseContainer(final ContainerInfo container,
final Set<ContainerReplica> replicas) {
final int replicationFactor =
container.getReplicationConfig().getRequiredNodes();
final long uniqueQuasiClosedReplicaCount = replicas.stream()
.filter(r -> r.getState() == State.QUASI_CLOSED)

final long uniqueQuasiClosedOrUnhealthyReplicaCount = replicas.stream()
.filter(r -> r.getState() == State.QUASI_CLOSED || r.getState() == State.UNHEALTHY)
.map(ContainerReplica::getOriginDatanodeId)
.distinct()
.count();

long maxQCSeq = -1;
long maxUnhealthySeq = -1;
for (ContainerReplica r : replicas) {
if (r.getState() == State.QUASI_CLOSED) {
maxQCSeq = Math.max(maxQCSeq, r.getSequenceId());
} else if (r.getState() == State.UNHEALTHY) {
maxUnhealthySeq = Math.max(maxUnhealthySeq, r.getSequenceId());
}
}

// We can only force close the container if we have seen all the replicas from unique origins.
// Due to unexpected behavior when writing to ratis containers, it is possible for blocks to be committed
// on the ratis leader, but not on the followers. A failure on the leader can result in two replicas
// without the latest transactions, which are then force closed. This can result in data loss.
// Note that if the 3rd replica is permanently lost, the container will be stuck in QUASI_CLOSED state forever.
return uniqueQuasiClosedReplicaCount >= replicationFactor;
// It is possible to CLOSE a container that has one QC and the remaining UNHEALTHY, provided the QC is one of the
// replicas with the highest sequence ID. If an UNHEALTHY replica has a higher sequence ID, the container will
// remain in QUASI_CLOSED state.
return maxQCSeq > -1 && maxQCSeq >= maxUnhealthySeq
&& uniqueQuasiClosedOrUnhealthyReplicaCount >= replicationFactor;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,90 @@ public void testQuasiClosedWithAllUniqueOriginSendsForceClose() {
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
}

/**
* When a container is QUASI_CLOSED with some unhealthy and all 3 are reported with unique
* origins, it should be forced closed.
*/
@Test
public void testQuasiClosedWithAllUniqueOriginAndUnhealthySendsForceClose() {
ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
ratisReplicationConfig, 1, QUASI_CLOSED);
// These 3 replicas will have the same BCSID and unique origin node ids
Set<ContainerReplica> containerReplicas = ReplicationTestUtil
.createReplicas(containerInfo.containerID(),
State.QUASI_CLOSED, 0, 0);
containerReplicas.addAll(ReplicationTestUtil
.createReplicas(containerInfo.containerID(),
State.UNHEALTHY, 0));
ContainerCheckRequest request = new ContainerCheckRequest.Builder()
.setPendingOps(Collections.emptyList())
.setReport(new ReplicationManagerReport())
.setContainerInfo(containerInfo)
.setContainerReplicas(containerReplicas)
.build();
ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
.setPendingOps(Collections.emptyList())
.setReport(new ReplicationManagerReport())
.setContainerInfo(containerInfo)
.setContainerReplicas(containerReplicas)
.setReadOnly(true)
.build();

assertFalse(quasiClosedContainerHandler.handle(request));
assertFalse(quasiClosedContainerHandler.handle(readRequest));
verify(replicationManager, times(2))
.sendCloseContainerReplicaCommand(any(), any(), anyBoolean());
}

/**
* If it's possible to force close replicas then only replicas with the
* highest Sequence ID (also known as BCSID) should be closed.
*/
@Test
public void testQuasiClosedWithUnhealthyHavingHighestSeq() {
final ContainerInfo containerInfo =
getContainer(HddsProtos.LifeCycleState.QUASI_CLOSED);
containerInfo.setUsedBytes(99);
final ContainerID id = containerInfo.containerID();

// create replicas with unique origin DNs
DatanodeDetails dnOne = randomDatanodeDetails();
DatanodeDetails dnTwo = randomDatanodeDetails();
DatanodeDetails dnThree = randomDatanodeDetails();

// 1001 is the highest sequence id
final ContainerReplica replicaOne = getReplicas(
id, State.QUASI_CLOSED, 1000L, dnOne.getUuid(), dnOne);
final ContainerReplica replicaTwo = getReplicas(
id, State.QUASI_CLOSED, 1000L, dnTwo.getUuid(), dnTwo);
final ContainerReplica replicaThree = getReplicas(
id, State.UNHEALTHY, 1001L, dnThree.getUuid(), dnThree);
Set<ContainerReplica> containerReplicas = new HashSet<>();
containerReplicas.add(replicaOne);
containerReplicas.add(replicaTwo);
containerReplicas.add(replicaThree);

ContainerCheckRequest request = new ContainerCheckRequest.Builder()
.setPendingOps(Collections.emptyList())
.setReport(new ReplicationManagerReport())
.setContainerInfo(containerInfo)
.setContainerReplicas(containerReplicas)
.build();
ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
.setPendingOps(Collections.emptyList())
.setReport(new ReplicationManagerReport())
.setContainerInfo(containerInfo)
.setContainerReplicas(containerReplicas)
.setReadOnly(true)
.build();

assertFalse(quasiClosedContainerHandler.handle(request));
assertFalse(quasiClosedContainerHandler.handle(readRequest));
// verify no close commands are sent as the container cannot be closed.
verify(replicationManager, times(0))
.sendCloseContainerReplicaCommand(eq(containerInfo), any(), anyBoolean());
}

/**
* The replicas are QUASI_CLOSED, but all of them have the same origin node
* id. Since all replicas must have unique origin node ids, the handler
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,58 @@
{ "type": "closeContainerCommand", "datanode": "d2" }
],
"commands": []
},
{ "description": "Quasi-Closed with 2 replicas and unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10,
"replicas": [
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o3"}
],
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0, "unhealthy": 0 },
"checkCommands": [
{ "type": "closeContainerCommand", "datanode": "d1" },
{ "type": "closeContainerCommand", "datanode": "d2" }
],
"commands": [
{ "type": "replicateContainerCommand" }
]
},
{ "description": "Quasi-Closed with 1 replica and two unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 10,
"replicas": [
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
{ "state": "UNHEALTHY", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o3"}
],
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 0, "unhealthy": 0 },
"checkCommands": [
{ "type": "closeContainerCommand", "datanode": "d1" }
],
"commands": [
{ "type": "replicateContainerCommand", "datanode": "d1" },
{ "type": "replicateContainerCommand", "datanode": "d1" }
]
},
{ "description": "Quasi-Closed with 2 replicas and unhealthy where unhealthy is highest BCSID", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11,
"replicas": [
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
{ "state": "UNHEALTHY", "index": 0, "datanode": "d3", "sequenceId": 11, "isEmpty": false, "origin": "o3"}
],
"expectation": { "underReplicated": 1, "underReplicatedQueue": 1, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 },
"checkCommands": [],
"commands": [
{ "type": "replicateContainerCommand" }
]
},
{ "description": "Quasi-Closed with 3 QC and one unhealthy", "containerState": "QUASI_CLOSED", "replicationConfig": "RATIS:THREE", "sequenceId": 11,
"replicas": [
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d1", "sequenceId": 10, "isEmpty": false, "origin": "o1"},
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d2", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
{ "state": "QUASI_CLOSED", "index": 0, "datanode": "d3", "sequenceId": 10, "isEmpty": false, "origin": "o2"},
{ "state": "UNHEALTHY", "index": 0, "datanode": "d4", "sequenceId": 11, "isEmpty": false, "origin": "o3"}
],
"expectation": { "underReplicated": 0, "underReplicatedQueue": 0, "overReplicated": 0, "overReplicatedQueue": 0, "quasiClosedStuck": 1, "unhealthy": 0 },
"checkCommands": [],
"commands": []
}
]
Loading