From fcede695c80c9b39031ca43343b2f7fc21e2da1a Mon Sep 17 00:00:00 2001 From: baoloongmao Date: Mon, 21 Sep 2020 17:42:51 +0800 Subject: [PATCH 1/4] HDDS-4263. ReplicatiomManager shouldn't retain one healthy replica per origin node Id. --- .../hadoop/hdds/scm/container/ReplicationManager.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index 6b1e2afc52bc..ea461dcf7330 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -24,12 +24,10 @@ import java.util.Collections; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; -import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Consumer; import java.util.function.Predicate; @@ -624,17 +622,8 @@ private void handleOverReplicatedContainer(final ContainerInfo container, " is {}, but found {}.", id, replicationFactor, replicationFactor + excess); - final Map uniqueReplicas = - new LinkedHashMap<>(); - - replicas.stream() - .filter(r -> compareState(container.getState(), r.getState())) - .forEach(r -> uniqueReplicas - .putIfAbsent(r.getOriginDatanodeId(), r)); - // Retain one healthy replica per origin node Id. final List eligibleReplicas = new ArrayList<>(replicas); - eligibleReplicas.removeAll(uniqueReplicas.values()); final List unhealthyReplicas = eligibleReplicas .stream() From c934174f68185868342c5fb27eb8280d7fa41c0e Mon Sep 17 00:00:00 2001 From: baoloongmao Date: Mon, 21 Sep 2020 18:55:23 +0800 Subject: [PATCH 2/4] HDDS-4263. ReplicatiomManager shouldn't retain one healthy replica per origin node Id for closed container. --- .../hdds/scm/container/ReplicationManager.java | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index ea461dcf7330..afd72373aca8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -24,10 +24,12 @@ import java.util.Collections; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.StringJoiner; +import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Consumer; import java.util.function.Predicate; @@ -622,9 +624,21 @@ private void handleOverReplicatedContainer(final ContainerInfo container, " is {}, but found {}.", id, replicationFactor, replicationFactor + excess); - // Retain one healthy replica per origin node Id. final List eligibleReplicas = new ArrayList<>(replicas); + final Map uniqueReplicas = + new LinkedHashMap<>(); + + if (container.getState() == LifeCycleState.CLOSED) { + replicas.stream() + .filter(r -> compareState(container.getState(), r.getState())) + .forEach(r -> uniqueReplicas + .putIfAbsent(r.getOriginDatanodeId(), r)); + + // Retain one healthy replica per origin node Id. + eligibleReplicas.removeAll(uniqueReplicas.values()); + } + final List unhealthyReplicas = eligibleReplicas .stream() .filter(r -> !compareState(container.getState(), r.getState())) From 058bb62c1553bf73cb70fe65e0179dd5cd7ba485 Mon Sep 17 00:00:00 2001 From: baoloongmao Date: Mon, 21 Sep 2020 19:16:55 +0800 Subject: [PATCH 3/4] HDDS-4263. ReplicatiomManager shouldn't retain one healthy replica per origin node Id for closed container. --- .../apache/hadoop/hdds/scm/container/ReplicationManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java index afd72373aca8..d2899fae2232 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java @@ -629,7 +629,7 @@ private void handleOverReplicatedContainer(final ContainerInfo container, final Map uniqueReplicas = new LinkedHashMap<>(); - if (container.getState() == LifeCycleState.CLOSED) { + if (container.getState() != LifeCycleState.CLOSED) { replicas.stream() .filter(r -> compareState(container.getState(), r.getState())) .forEach(r -> uniqueReplicas From 205cc65d6db107725ef304efeb7d45610cad6787 Mon Sep 17 00:00:00 2001 From: baoloongmao Date: Tue, 22 Sep 2020 15:46:31 +0800 Subject: [PATCH 4/4] fix test case to satisfy the new logic --- .../hadoop/hdds/scm/container/TestReplicationManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java index b1e27c0816e8..b11582ab1867 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManager.java @@ -817,7 +817,7 @@ public void testOverReplicatedAndPolicyUnSatisfiedAndDeleted() throws final ContainerReplica replicaFour = getReplicas( id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); final ContainerReplica replicaFive = getReplicas( - id, State.CLOSED, 1000L, originNodeId, randomDatanodeDetails()); + id, State.QUASI_CLOSED, 1000L, originNodeId, randomDatanodeDetails()); containerStateManager.loadContainer(container); containerStateManager.updateContainerReplica(id, replicaOne);