diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java index af1287521916..523d6dc18c29 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java @@ -71,18 +71,16 @@ public boolean handle(ContainerCheckRequest request) { // handler so return as unhandled so any further handlers will be tried. return false; } - // TODO - should the report have a HEALTHY state, rather than just bad - // states? It would need to be added to legacy RM too. if (health.getHealthState() == ContainerHealthResult.HealthState.UNDER_REPLICATED) { - report.incrementAndSample( - ReplicationManagerReport.HealthState.UNDER_REPLICATED, containerID); ContainerHealthResult.UnderReplicatedHealthResult underHealth = ((ContainerHealthResult.UnderReplicatedHealthResult) health); if (underHealth.isUnrecoverable()) { - // TODO - do we need a new health state for unrecoverable EC? report.incrementAndSample( ReplicationManagerReport.HealthState.MISSING, containerID); + } else { + report.incrementAndSample( + ReplicationManagerReport.HealthState.UNDER_REPLICATED, containerID); } if (!underHealth.isReplicatedOkAfterPending() && (!underHealth.isUnrecoverable() diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java index b1ef86f70965..53a30f03c680 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java @@ -89,9 +89,6 @@ public boolean handle(ContainerCheckRequest request) { return false; } - report.incrementAndSample( - ReplicationManagerReport.HealthState.UNDER_REPLICATED, - container.containerID()); LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending" + " is [{}]. isUnrecoverable is [{}]. hasHealthyReplicas is [{}].", container, @@ -103,6 +100,10 @@ public boolean handle(ContainerCheckRequest request) { container.containerID()); return true; } + report.incrementAndSample( + ReplicationManagerReport.HealthState.UNDER_REPLICATED, + container.containerID()); + if (!underHealth.isReplicatedOkAfterPending() && underHealth.hasHealthyReplicas()) { request.getReplicationQueue().enqueue(underHealth); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index b43675319571..8d9652048853 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -430,7 +430,7 @@ public void testUnderReplicatedAndUnrecoverable() // replication list. It will be checked again on the next RM run. Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repReport.getStat( + Assert.assertEquals(0, repReport.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, repReport.getStat( ReplicationManagerReport.HealthState.MISSING)); @@ -492,7 +492,7 @@ public void testUnrecoverableClosedContainerWithUnhealthyReplicas() Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, repReport.getStat( + Assert.assertEquals(0, repReport.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, repReport.getStat( ReplicationManagerReport.HealthState.MISSING)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java index c3fe3664ed3e..850e975b22ea 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestECReplicationCheckHandler.java @@ -271,7 +271,7 @@ public void testUnderReplicatedAndUnrecoverable() { // Unrecoverable so not added to the queue Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, report.getStat( + Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.MISSING)); @@ -310,7 +310,7 @@ private void testUnderReplicatedAndUnrecoverableWithOffline( // Unrecoverable so not added to the queue Assert.assertEquals(1, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, report.getStat( + Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.MISSING)); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java index deaecb033534..ca227cc16473 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java @@ -290,7 +290,7 @@ public void testUnderReplicatedAndUnrecoverable() { // Unrecoverable, so not added to the queue. Assert.assertEquals(0, repQueue.underReplicatedQueueSize()); Assert.assertEquals(0, repQueue.overReplicatedQueueSize()); - Assert.assertEquals(1, report.getStat( + Assert.assertEquals(0, report.getStat( ReplicationManagerReport.HealthState.UNDER_REPLICATED)); Assert.assertEquals(1, report.getStat( ReplicationManagerReport.HealthState.MISSING));