diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java index a2139707ca24..1cd9c37e29f9 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java @@ -46,6 +46,6 @@ public LegacyRatisContainerReplicaCount(ContainerInfo container, @Override protected int healthyReplicaCountAdapter() { - return 0; + return -getMisMatchedReplicaCount(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java index ca30d2d5b86a..e65ac74ed1e7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java @@ -19,11 +19,13 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerReplica; import java.util.ArrayList; import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -44,10 +46,11 @@ public class RatisContainerReplicaCount implements ContainerReplicaCount { private int healthyReplicaCount; private int unhealthyReplicaCount; + private int misMatchedReplicaCount; private int decommissionCount; private int maintenanceCount; - private final int inFlightAdd; - private final int inFlightDel; + private int inFlightAdd; + private int inFlightDel; private final int repFactor; private final int minHealthyForMaintenance; private final ContainerInfo container; @@ -60,6 +63,7 @@ public RatisContainerReplicaCount(ContainerInfo container, int minHealthyForMaintenance) { this.unhealthyReplicaCount = 0; this.healthyReplicaCount = 0; + this.misMatchedReplicaCount = 0; this.decommissionCount = 0; this.maintenanceCount = 0; this.inFlightAdd = inFlightAdd; @@ -84,11 +88,86 @@ public RatisContainerReplicaCount(ContainerInfo container, } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { maintenanceCount++; } else { - if (LegacyReplicationManager.compareState(container.getState(), + if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) { + unhealthyReplicaCount++; + } else if (!LegacyReplicationManager.compareState(container.getState(), cr.getState())) { healthyReplicaCount++; + misMatchedReplicaCount++; } else { + healthyReplicaCount++; + } + } + } + } + + public RatisContainerReplicaCount(ContainerInfo containerInfo, + Set replicas, + List replicaPendingOps, + int minHealthyForMaintenance) { + // Iterate replicas in deterministic order to avoid potential data loss + // on delete. + // See https://issues.apache.org/jira/browse/HDDS-4589. + // N.B., sort replicas by (containerID, datanodeDetails). + this.replicas = replicas.stream() + .sorted(Comparator.comparingLong(ContainerReplica::hashCode)) + .collect(Collectors.toList()); + + this.container = containerInfo; + this.repFactor = containerInfo.getReplicationFactor().getNumber(); + this.minHealthyForMaintenance + = Math.min(this.repFactor, minHealthyForMaintenance); + + this.unhealthyReplicaCount = 0; + this.healthyReplicaCount = 0; + this.misMatchedReplicaCount = 0; + this.decommissionCount = 0; + this.maintenanceCount = 0; + this.inFlightAdd = 0; + this.inFlightDel = 0; + + // collect DNs that have UNHEALTHY replicas + Set unhealthyReplicaDNs = new HashSet<>(); + for (ContainerReplica r : replicas) { + if (r.getState() == ContainerReplicaProto.State.UNHEALTHY) { + unhealthyReplicaDNs.add(r.getDatanodeDetails()); + } + } + + // count pending adds and deletes + for (ContainerReplicaOp op : replicaPendingOps) { + if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) { + inFlightAdd++; + } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) { + if (!unhealthyReplicaDNs.contains(op.getTarget())) { + /* + We don't count UNHEALTHY replicas when considering sufficient + replication, so we also need to ignore pending deletes on + those unhealthy replicas, otherwise the pending delete will + decrement the healthy count and make the container appear + under-replicated when it is not. + */ + inFlightDel++; + } + } + } + + for (ContainerReplica cr : this.replicas) { + HddsProtos.NodeOperationalState state = + cr.getDatanodeDetails().getPersistedOpState(); + if (state == DECOMMISSIONED || state == DECOMMISSIONING) { + decommissionCount++; + } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { + maintenanceCount++; + } else { + if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) { unhealthyReplicaCount++; + } else if (!LegacyReplicationManager.compareState(container.getState(), + cr.getState())) { + healthyReplicaCount++; + misMatchedReplicaCount++; + } else { + healthyReplicaCount++; } } } @@ -102,15 +181,20 @@ public int getUnhealthyReplicaCount() { return unhealthyReplicaCount; } + public int getMisMatchedReplicaCount() { + return misMatchedReplicaCount; + } + /** - * The new replication manager currently counts unhealthy and healthy - * replicas together. This should be updated when changes from HDDS-6447 - * are integrated into the new replication manager. See - * {@link LegacyRatisContainerReplicaCount}, which overrides this method, for - * details. + * The new replication manager now does not consider replicas with + * UNHEALTHY state when counting sufficient replication. This method is + * overridden to ensure LegacyReplicationManager works as intended in + * HDDS-6447. + * See {@link LegacyRatisContainerReplicaCount}, which overrides this + * method, for details. */ protected int healthyReplicaCountAdapter() { - return getUnhealthyReplicaCount(); + return 0; } @Override diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java index 7d4947dd64d6..0363af0e4e62 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.scm.container.replication; +import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -26,7 +27,10 @@ import org.junit.Assert; import org.junit.jupiter.api.Test; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Set; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; @@ -35,7 +39,12 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSING; import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.OPEN; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica; +import static org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -412,6 +421,43 @@ void testIsHealthyWithDifferentReplicaStateNotHealthy() { assertFalse(rcnt.isHealthy()); } + @Test + void testSufficientReplicationWithMismatchedReplicaState() { + ContainerInfo container = + createContainerInfo(RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.THREE), 1L, + HddsProtos.LifeCycleState.CLOSED); + Set replicas = + createReplicas(ContainerID.valueOf(1L), CLOSED, 0, 0); + replicas.add(createContainerReplica(ContainerID.valueOf(1L), 0, + IN_SERVICE, CLOSING)); + + RatisContainerReplicaCount rcnt = + new RatisContainerReplicaCount(container, replicas, + Collections.emptyList(), 2); + assertTrue(rcnt.isSufficientlyReplicated()); + } + + @Test + void testSufficientReplicationWithPendingDeleteOnUnhealthyReplica() { + ContainerInfo container = + createContainerInfo(RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.THREE), 1L, + HddsProtos.LifeCycleState.CLOSED); + Set replicas = + createReplicas(ContainerID.valueOf(1L), CLOSED, 0, 0, 0); + ContainerReplica unhealthyReplica = createContainerReplica( + ContainerID.valueOf(1L), 0, IN_SERVICE, UNHEALTHY); + replicas.add(unhealthyReplica); + + List ops = new ArrayList<>(); + ops.add(ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.DELETE, + unhealthyReplica.getDatanodeDetails(), 0)); + RatisContainerReplicaCount replicaCount = + new RatisContainerReplicaCount(container, replicas, ops, 2); + assertTrue(replicaCount.isSufficientlyReplicated()); + } + @Test void testIsHealthyWithMaintReplicaIsHealthy() { Set replica = diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java index 0bf0f1d7464f..5b0b762307f9 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java @@ -149,31 +149,6 @@ public void testOverReplicatedQuasiClosedContainerWithDifferentOrigins() getOverReplicatedHealthResult(), 0); } - /** - * When a quasi closed container is over replicated, the handler should - * prioritize creating delete commands for unhealthy replicas over quasi - * closed replicas. - */ - @Test - public void testOverReplicatedQuasiClosedContainerWithUnhealthyReplica() - throws IOException { - container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED, - RATIS_REPLICATION_CONFIG); - Set replicas = - createReplicasWithSameOrigin(container.containerID(), - ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0); - ContainerReplica unhealthyReplica = - createContainerReplica(container.containerID(), 0, - HddsProtos.NodeOperationalState.IN_SERVICE, - ContainerReplicaProto.State.UNHEALTHY); - replicas.add(unhealthyReplica); - - Map> commands = testProcessing(replicas, - Collections.emptyList(), getOverReplicatedHealthResult(), 1); - Assert.assertTrue( - commands.containsKey(unhealthyReplica.getDatanodeDetails())); - } - /** * Handler should not create any delete commands if removing a replica * makes the container mis replicated.