diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java index 5d80268ef4fc..6393f0349d43 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java @@ -113,7 +113,15 @@ public static class UnderReplicatedHealthResult public UnderReplicatedHealthResult(ContainerInfo containerInfo, int remainingRedundancy, boolean dueToDecommission, boolean replicatedOkWithPending, boolean unrecoverable) { - super(containerInfo, HealthState.UNDER_REPLICATED); + this(containerInfo, remainingRedundancy, dueToDecommission, + replicatedOkWithPending, unrecoverable, HealthState.UNDER_REPLICATED); + } + + protected UnderReplicatedHealthResult(ContainerInfo containerInfo, + int remainingRedundancy, boolean dueToDecommission, + boolean replicatedOkWithPending, boolean unrecoverable, + HealthState healthState) { + super(containerInfo, healthState); this.remainingRedundancy = remainingRedundancy; this.dueToDecommission = dueToDecommission; this.sufficientlyReplicatedAfterPending = replicatedOkWithPending; @@ -148,7 +156,7 @@ public int getWeightedRedundancy() { if (dueToDecommission) { result += DECOMMISSION_REDUNDANCY; } else { - result += remainingRedundancy; + result += getRemainingRedundancy(); } return result; } @@ -207,19 +215,28 @@ public boolean isUnrecoverable() { * containers are not spread across enough racks. */ public static class MisReplicatedHealthResult - extends ContainerHealthResult { + extends UnderReplicatedHealthResult { - private final boolean replicatedOkAfterPending; + /** + * In UnderReplicatedHealthState, DECOMMISSION_REDUNDANCY is defined as + * 5 so that containers which are really under replicated get fixed as a + * priority over decommissioning hosts. We have defined that a container + * can only be mis replicated if it is not over or under replicated. Fixing + * mis replication is arguably less important than competing a decommission. + * So as a lot of mis replicated container do not block decommission, we + * set the redundancy of mis replicated containers to 6 so they sort after + * under / over replicated and decommissioning replicas in the under + * replication queue. + */ + private static final int MIS_REP_REDUNDANCY = 6; public MisReplicatedHealthResult(ContainerInfo containerInfo, boolean replicatedOkAfterPending) { - super(containerInfo, HealthState.MIS_REPLICATED); - this.replicatedOkAfterPending = replicatedOkAfterPending; + super(containerInfo, MIS_REP_REDUNDANCY, false, + replicatedOkAfterPending, false, + HealthState.MIS_REPLICATED); } - public boolean isReplicatedOkAfterPending() { - return replicatedOkAfterPending; - } } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index 351eec71781d..5fd0ad2ea2e5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -159,6 +159,7 @@ public class ReplicationManager implements SCMService { private ReplicationQueue replicationQueue; private final ECUnderReplicationHandler ecUnderReplicationHandler; private final ECOverReplicationHandler ecOverReplicationHandler; + private final ECMisReplicationHandler ecMisReplicationHandler; private final RatisUnderReplicationHandler ratisUnderReplicationHandler; private final RatisOverReplicationHandler ratisOverReplicationHandler; private final int maintenanceRedundancy; @@ -223,6 +224,8 @@ public ReplicationManager(final ConfigurationSource conf, ecContainerPlacement, conf, nodeManager, this); ecOverReplicationHandler = new ECOverReplicationHandler(ecContainerPlacement, nodeManager); + ecMisReplicationHandler = new ECMisReplicationHandler(ecContainerPlacement, + conf, nodeManager); ratisUnderReplicationHandler = new RatisUnderReplicationHandler( ratisContainerPlacement, conf, nodeManager); ratisOverReplicationHandler = @@ -525,8 +528,18 @@ public Map> processUnderReplicatedContainer( List pendingOps = containerReplicaPendingOps.getPendingOps(containerID); if (result.getContainerInfo().getReplicationType() == EC) { - return ecUnderReplicationHandler.processAndCreateCommands(replicas, - pendingOps, result, maintenanceRedundancy); + if (result.getHealthState() + == ContainerHealthResult.HealthState.UNDER_REPLICATED) { + return ecUnderReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, maintenanceRedundancy); + } else if (result.getHealthState() + == ContainerHealthResult.HealthState.MIS_REPLICATED) { + return ecMisReplicationHandler.processAndCreateCommands(replicas, + pendingOps, result, maintenanceRedundancy); + } else { + throw new IllegalArgumentException("Unexpected health state: " + + result.getHealthState()); + } } return ratisUnderReplicationHandler.processAndCreateCommands(replicas, pendingOps, result, ratisMaintenanceMinReplicas); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java index a14d21c76c31..d27c1d9c6106 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationQueue.java @@ -32,8 +32,6 @@ public class ReplicationQueue { underRepQueue; private final Queue overRepQueue; - private final Queue - misRepQueue; public ReplicationQueue() { underRepQueue = new PriorityQueue<>( @@ -42,7 +40,6 @@ public ReplicationQueue() { .thenComparing(ContainerHealthResult .UnderReplicatedHealthResult::getRequeueCount)); overRepQueue = new LinkedList<>(); - misRepQueue = new LinkedList<>(); } public void enqueue(ContainerHealthResult.UnderReplicatedHealthResult @@ -55,11 +52,6 @@ public void enqueue(ContainerHealthResult.OverReplicatedHealthResult overRepQueue.add(overReplicatedHealthResult); } - public void enqueue(ContainerHealthResult.MisReplicatedHealthResult - misReplicatedHealthResult) { - misRepQueue.add(misReplicatedHealthResult); - } - public ContainerHealthResult.UnderReplicatedHealthResult dequeueUnderReplicatedContainer() { return underRepQueue.poll(); @@ -70,11 +62,6 @@ public void enqueue(ContainerHealthResult.MisReplicatedHealthResult return overRepQueue.poll(); } - public ContainerHealthResult.MisReplicatedHealthResult - dequeueMisReplicatedContainer() { - return misRepQueue.poll(); - } - public int underReplicatedQueueSize() { return underRepQueue.size(); } @@ -83,8 +70,4 @@ public int overReplicatedQueueSize() { return overRepQueue.size(); } - public int misReplicatedQueueSize() { - return misRepQueue.size(); - } - } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index f537ec5628ae..dd4a9e385ccb 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -390,6 +390,13 @@ public void testOverReplicatedFixByPending() @Test public void testUnderReplicationQueuePopulated() { + // Make it always return mis-replicated. Only a perfectly replicated + // container should make it the mis-replicated state as under / over + // replicated take precedence. + Mockito.when(ecPlacementPolicy.validateContainerPlacement( + anyList(), anyInt())) + .thenReturn(new ContainerPlacementStatusDefault(1, 2, 3)); + ContainerInfo decomContainer = createContainerInfo(repConfig, 1, HddsProtos.LifeCycleState.CLOSED); addReplicas(decomContainer, ContainerReplicaProto.State.CLOSED, @@ -404,6 +411,10 @@ public void testUnderReplicationQueuePopulated() { HddsProtos.LifeCycleState.CLOSED); addReplicas(underRep0, ContainerReplicaProto.State.CLOSED, 1, 2, 3); + ContainerInfo misRep = createContainerInfo(repConfig, 4, + HddsProtos.LifeCycleState.CLOSED); + addReplicas(misRep, ContainerReplicaProto.State.CLOSED, 1, 2, 3, 4, 5); + enableProcessAll(); replicationManager.processAll(); @@ -438,6 +449,10 @@ public void testUnderReplicationQueuePopulated() { res = replicationManager.dequeueUnderReplicatedContainer(); Assert.assertEquals(underRep0, res.getContainerInfo()); + // Next is the mis-rep container, which has a remaining redundancy of 6. + res = replicationManager.dequeueUnderReplicatedContainer(); + Assert.assertEquals(misRep, res.getContainerInfo()); + res = replicationManager.dequeueUnderReplicatedContainer(); Assert.assertNull(res); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java index 06f68ee91cee..8b7aa665c8b6 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java @@ -481,9 +481,8 @@ public void testMisReplicatedContainer() { Assert.assertEquals(HealthState.MIS_REPLICATED, result.getHealthState()); Assert.assertTrue(healthCheck.handle(request)); - Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -531,7 +530,6 @@ public void testMisReplicatedContainerFixedByPending() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -567,7 +565,6 @@ public void testUnderAndMisReplicatedContainer() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -604,7 +601,6 @@ public void testOverAndMisReplicatedContainer() { Assert.assertTrue(healthCheck.handle(request)); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(1, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java index f6493b7817c4..9cc2c6263d44 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java @@ -423,7 +423,6 @@ public void testUnderReplicatedWithMisReplication() { Assert.assertTrue(healthCheck.handle(requestBuilder.build())); Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -468,7 +467,6 @@ public void testUnderReplicatedWithMisReplicationFixedByPending() { Assert.assertTrue(healthCheck.handle(requestBuilder.build())); Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(0, repQueue.misReplicatedQueueSize()); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(0, report.getStat( @@ -494,9 +492,8 @@ public void testMisReplicated() { Assert.assertFalse(result.isReplicatedOkAfterPending()); Assert.assertTrue(healthCheck.handle(requestBuilder.build())); - Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); + Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repQueue.misReplicatedQueueSize()); Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat(