-
Notifications
You must be signed in to change notification settings - Fork 587
HDDS-6970. EC: Ensure DatanodeAdminMonitor can handle EC containers during decommission #3573
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
052fae3
HDDS-6970. EC: Ensure DatanodeAdminMonitor can handle EC containers d…
adoroszlai 0cc46ab
Address review comments
adoroszlai 957fada
Address warnings
adoroszlai eff1b3c
Add TODO for remainingRedundancyForMaintenance=0
adoroszlai 2f52e46
Add Jira ID in TODO
adoroszlai 7f4fab3
Merge remote-tracking branch 'origin/master' into HDDS-6970
adoroszlai a98c8fc
Unify isMissing and unRecoverable as isUnrecoverable
adoroszlai File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
262 changes: 262 additions & 0 deletions
262
...cm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerIdenticalReplicaCount.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,262 @@ | ||
| /** | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
| package org.apache.hadoop.hdds.scm.container; | ||
|
|
||
| import org.apache.hadoop.hdds.protocol.proto.HddsProtos; | ||
|
|
||
| import java.util.Set; | ||
|
|
||
| import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; | ||
| import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING; | ||
| import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE; | ||
| import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE; | ||
|
|
||
| /** | ||
| * Immutable object that is created with a set of ContainerReplica objects and | ||
| * the number of in flight replica add and deletes, the container replication | ||
| * factor and the min count which must be available for maintenance. This | ||
| * information can be used to determine if the container is over or under | ||
| * replicated and also how many additional replicas need created or removed. | ||
| */ | ||
| public class ContainerIdenticalReplicaCount implements ContainerReplicaCount { | ||
|
|
||
| private int healthyCount; | ||
| private int decommissionCount; | ||
| private int maintenanceCount; | ||
| private final int inFlightAdd; | ||
| private final int inFlightDel; | ||
| private final int repFactor; | ||
| private final int minHealthyForMaintenance; | ||
| private final ContainerInfo container; | ||
| private final Set<ContainerReplica> replica; | ||
|
|
||
| public ContainerIdenticalReplicaCount(ContainerInfo container, | ||
| Set<ContainerReplica> replica, int inFlightAdd, | ||
| int inFlightDelete, int replicationFactor, | ||
| int minHealthyForMaintenance) { | ||
| this.healthyCount = 0; | ||
| this.decommissionCount = 0; | ||
| this.maintenanceCount = 0; | ||
| this.inFlightAdd = inFlightAdd; | ||
| this.inFlightDel = inFlightDelete; | ||
| this.repFactor = replicationFactor; | ||
| this.replica = replica; | ||
| this.minHealthyForMaintenance | ||
| = Math.min(this.repFactor, minHealthyForMaintenance); | ||
| this.container = container; | ||
|
|
||
| for (ContainerReplica cr : this.replica) { | ||
| HddsProtos.NodeOperationalState state = | ||
| cr.getDatanodeDetails().getPersistedOpState(); | ||
| if (state == DECOMMISSIONED || state == DECOMMISSIONING) { | ||
| decommissionCount++; | ||
| } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) { | ||
| maintenanceCount++; | ||
| } else { | ||
| healthyCount++; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public int getHealthyCount() { | ||
| return healthyCount; | ||
| } | ||
|
|
||
| @Override | ||
| public int getDecommissionCount() { | ||
| return decommissionCount; | ||
| } | ||
|
|
||
| @Override | ||
| public int getMaintenanceCount() { | ||
| return maintenanceCount; | ||
| } | ||
|
|
||
| public int getReplicationFactor() { | ||
| return repFactor; | ||
| } | ||
|
|
||
| @Override | ||
| public ContainerInfo getContainer() { | ||
| return container; | ||
| } | ||
|
|
||
| @Override | ||
| public Set<ContainerReplica> getReplicas() { | ||
| return replica; | ||
| } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "Container State: " + container.getState() + | ||
| " Replica Count: " + replica.size() + | ||
| " Healthy Count: " + healthyCount + | ||
| " Decommission Count: " + decommissionCount + | ||
| " Maintenance Count: " + maintenanceCount + | ||
| " inFlightAdd Count: " + inFlightAdd + | ||
| " inFightDel Count: " + inFlightDel + | ||
| " ReplicationFactor: " + repFactor + | ||
| " minMaintenance Count: " + minHealthyForMaintenance; | ||
| } | ||
|
|
||
| /** | ||
| * Calculates the delta of replicas which need to be created or removed | ||
| * to ensure the container is correctly replicated when considered inflight | ||
| * adds and deletes. | ||
| * | ||
| * When considering inflight operations, it is assumed any operation will | ||
| * fail. However, to consider the worst case and avoid data loss, we always | ||
| * assume a delete will succeed and and add will fail. In this way, we will | ||
| * avoid scheduling too many deletes which could result in dataloss. | ||
| * | ||
| * Decisions around over-replication are made only on healthy replicas, | ||
| * ignoring any in maintenance and also any inflight adds. InFlight adds are | ||
| * ignored, as they may not complete, so if we have: | ||
| * | ||
| * H, H, H, IN_FLIGHT_ADD | ||
| * | ||
| * And then schedule a delete, we could end up under-replicated (add fails, | ||
| * delete completes). It is better to let the inflight operations complete | ||
| * and then deal with any further over or under replication. | ||
| * | ||
| * For maintenance replicas, assuming replication factor 3, and minHealthy | ||
| * 2, it is possible for all 3 hosts to be put into maintenance, leaving the | ||
| * following (H = healthy, M = maintenance): | ||
| * | ||
| * H, H, M, M, M | ||
| * | ||
| * Even though we are tracking 5 replicas, this is not over replicated as we | ||
| * ignore the maintenance copies. Later, the replicas could look like: | ||
| * | ||
| * H, H, H, H, M | ||
| * | ||
| * At this stage, the container is over replicated by 1, so one replica can be | ||
| * removed. | ||
| * | ||
| * For containers which have replication factor healthy replica, we ignore any | ||
| * inflight add or deletes, as they may fail. Instead, wait for them to | ||
| * complete and then deal with any excess or deficit. | ||
| * | ||
| * For under replicated containers we do consider inflight add and delete to | ||
| * avoid scheduling more adds than needed. There is additional logic around | ||
| * containers with maintenance replica to ensure minHealthyForMaintenance | ||
| * replia are maintained. | ||
| * | ||
| * @return Delta of replicas needed. Negative indicates over replication and | ||
| * containers should be removed. Positive indicates over replication | ||
| * and zero indicates the containers has replicationFactor healthy | ||
| * replica | ||
| */ | ||
| public int additionalReplicaNeeded() { | ||
| int delta = missingReplicas(); | ||
|
|
||
| if (delta < 0) { | ||
| // Over replicated, so may need to remove a container. Do not consider | ||
| // inFlightAdds, as they may fail, but do consider inFlightDel which | ||
| // will reduce the over-replication if it completes. | ||
| // Note this could make the delta positive if there are too many in flight | ||
| // deletes, which will result in an additional being scheduled. | ||
| return delta + inFlightDel; | ||
| } else { | ||
| // May be under or perfectly replicated. | ||
| // We must consider in flight add and delete when calculating the new | ||
| // containers needed, but we bound the lower limit at zero to allow | ||
| // inflight operations to complete before handling any potential over | ||
| // replication | ||
| return Math.max(0, delta - inFlightAdd + inFlightDel); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the count of replicas which need to be created or removed to | ||
| * ensure the container is perfectly replicate. Inflight operations are not | ||
| * considered here, but the logic to determine the missing or excess counts | ||
| * for maintenance is present. | ||
| * | ||
| * Decisions around over-replication are made only on healthy replicas, | ||
| * ignoring any in maintenance. For example, if we have: | ||
| * | ||
| * H, H, H, M, M | ||
| * | ||
| * This will not be consider over replicated until one of the Maintenance | ||
| * replicas moves to Healthy. | ||
| * | ||
| * If the container is perfectly replicated, zero will be return. | ||
| * | ||
| * If it is under replicated a positive value will be returned, indicating | ||
| * how many replicas must be added. | ||
| * | ||
| * If it is over replicated a negative value will be returned, indicating now | ||
| * many replicas to remove. | ||
| * | ||
| * @return Zero if the container is perfectly replicated, a positive value | ||
| * for under replicated and a negative value for over replicated. | ||
| */ | ||
| private int missingReplicas() { | ||
| int delta = repFactor - healthyCount; | ||
|
|
||
| if (delta < 0) { | ||
| // Over replicated, so may need to remove a container. | ||
| return delta; | ||
| } else if (delta > 0) { | ||
| // May be under-replicated, depending on maintenance. | ||
| delta = Math.max(0, delta - maintenanceCount); | ||
| int neededHealthy = | ||
| Math.max(0, minHealthyForMaintenance - healthyCount); | ||
| delta = Math.max(neededHealthy, delta); | ||
| return delta; | ||
| } else { // delta == 0 | ||
| // We have exactly the number of healthy replicas needed. | ||
| return delta; | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Return true if the container is sufficiently replicated. Decommissioning | ||
| * and Decommissioned containers are ignored in this check, assuming they will | ||
| * eventually be removed from the cluster. | ||
| * This check ignores inflight additions, as those replicas have not yet been | ||
| * created and the create could fail for some reason. | ||
| * The check does consider inflight deletes as there may be 3 healthy replicas | ||
| * now, but once the delete completes it will reduce to 2. | ||
| * We also assume a replica in Maintenance state cannot be removed, so the | ||
| * pending delete would affect only the healthy replica count. | ||
| * | ||
| * @return True if the container is sufficiently replicated and False | ||
| * otherwise. | ||
| */ | ||
| @Override | ||
| public boolean isSufficientlyReplicated() { | ||
| return missingReplicas() + inFlightDel <= 0; | ||
| } | ||
|
|
||
| /** | ||
| * Return true is the container is over replicated. Decommission and | ||
| * maintenance containers are ignored for this check. | ||
| * The check ignores inflight additions, as they may fail, but it does | ||
| * consider inflight deletes, as they would reduce the over replication when | ||
| * they complete. | ||
| * | ||
| * @return True if the container is over replicated, false otherwise. | ||
| */ | ||
| @Override | ||
| public boolean isOverReplicated() { | ||
| return missingReplicas() + inFlightDel < 0; | ||
| } | ||
|
|
||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure about the name of the class. As the other one is called ECContainerReplicaCount, would this be better as RatisContainerReplicaCount, or ReplicatedContainerReplicaCount maybe?