Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,24 @@ public ECContainerReplicaCount(ContainerInfo containerInfo,
this.remainingMaintenanceRedundancy
= Math.min(repConfig.getParity(), remainingMaintenanceRedundancy);

Set<DatanodeDetails> unhealthyReplicaDNs = new HashSet<>();
for (ContainerReplica r : replicas) {
if (r.getState() == ContainerReplicaProto.State.UNHEALTHY) {
unhealthyReplicaDNs.add(r.getDatanodeDetails());
}
}

for (ContainerReplicaOp op : replicaPendingOps) {
if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
pendingAdd.add(op.getReplicaIndex());
} else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
pendingDelete.add(op.getReplicaIndex());
if (!unhealthyReplicaDNs.contains(op.getTarget())) {
// We ignore unhealthy replicas later in this method, so we also
// need to ignore pending deletes on those unhealthy replicas,
// otherwise the pending delete will decrement the healthy count and
// make the container appear under-replicated when it is not.
pendingDelete.add(op.getReplicaIndex());
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED;
import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;

/**
Expand Down Expand Up @@ -612,4 +613,38 @@ public void testSufficientlyReplicatedForOffline() {
Assertions.assertFalse(rcnt.isSufficientlyReplicatedForOffline(
inServiceReplica.getDatanodeDetails()));
}

@Test
public void testSufficientlyReplicatedWithUnhealthyAndPendingDelete() {
Set<ContainerReplica> replica = ReplicationTestUtil
.createReplicas(Pair.of(IN_SERVICE, 1),
Pair.of(IN_SERVICE, 2),
Pair.of(IN_SERVICE, 3),
Pair.of(IN_SERVICE, 4),
Pair.of(IN_SERVICE, 5));

ContainerReplica unhealthyReplica =
ReplicationTestUtil.createContainerReplica(container.containerID(),
1, IN_SERVICE, UNHEALTHY);
replica.add(unhealthyReplica);

List<ContainerReplicaOp> pendingOps = new ArrayList<>();
pendingOps.add(ContainerReplicaOp.create(
ContainerReplicaOp.PendingOpType.DELETE,
unhealthyReplica.getDatanodeDetails(),
unhealthyReplica.getReplicaIndex()));

ECContainerReplicaCount rcnt =
new ECContainerReplicaCount(container, replica, pendingOps, 1);
Assertions.assertTrue(rcnt.isSufficientlyReplicated(false));

// Add another pending delete to an index that is not an unhealthy index
pendingOps.add(ContainerReplicaOp.create(
ContainerReplicaOp.PendingOpType.DELETE,
MockDatanodeDetails.randomDatanodeDetails(), 2));

rcnt = new ECContainerReplicaCount(container, replica, pendingOps, 1);
Assertions.assertFalse(rcnt.isSufficientlyReplicated(false));
Assertions.assertEquals(2, rcnt.unavailableIndexes(false).get(0));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY;
import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.ADD;
import static org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
Expand Down Expand Up @@ -693,4 +694,39 @@ public void testOverAndMisReplicatedContainer() {
ReplicationManagerReport.HealthState.MIS_REPLICATED));
}

@Test
public void testUnhealthyReplicaWithOtherCopyAndPendingDelete() {
ContainerInfo container = createContainerInfo(repConfig);
Set<ContainerReplica> replicas = createReplicas(container.containerID(),
Pair.of(IN_SERVICE, 1), Pair.of(IN_SERVICE, 2),
Pair.of(IN_SERVICE, 3), Pair.of(IN_SERVICE, 4),
Pair.of(IN_SERVICE, 5));

ContainerReplica unhealthyReplica = ReplicationTestUtil
.createContainerReplica(container.containerID(), 1, IN_SERVICE,
UNHEALTHY);
replicas.add(unhealthyReplica);

List<ContainerReplicaOp> pendingOps = new ArrayList<>();
pendingOps.add(ContainerReplicaOp.create(DELETE,
unhealthyReplica.getDatanodeDetails(),
unhealthyReplica.getReplicaIndex()));

ContainerCheckRequest request = requestBuilder
.setContainerReplicas(replicas)
.setContainerInfo(container)
.setPendingOps(pendingOps)
.build();
ContainerHealthResult result = healthCheck.checkHealth(request);
Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());

Assert.assertFalse(healthCheck.handle(request));
Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.UNDER_REPLICATED));
Assert.assertEquals(0, report.getStat(
ReplicationManagerReport.HealthState.OVER_REPLICATED));
}

}