Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -22,238 +22,24 @@

import java.util.Set;

import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;

/**
* Immutable object that is created with a set of ContainerReplica objects and
* the number of in flight replica add and deletes, the container replication
* factor and the min count which must be available for maintenance. This
* information can be used to determine if the container is over or under
* replicated and also how many additional replicas need created or removed.
* Common interface for EC and non-EC container replica counts.
* TODO pull up more methods if needed
*/
public class ContainerReplicaCount {

private int healthyCount = 0;
private int decommissionCount = 0;
private int maintenanceCount = 0;
private int inFlightAdd = 0;
private int inFlightDel = 0;
private int repFactor;
private int minHealthyForMaintenance;
private ContainerInfo container;
private Set<ContainerReplica> replica;

public ContainerReplicaCount(ContainerInfo container,
Set<ContainerReplica> replica, int inFlightAdd,
int inFlightDelete, int replicationFactor,
int minHealthyForMaintenance) {
this.healthyCount = 0;
this.decommissionCount = 0;
this.maintenanceCount = 0;
this.inFlightAdd = inFlightAdd;
this.inFlightDel = inFlightDelete;
this.repFactor = replicationFactor;
this.replica = replica;
this.minHealthyForMaintenance
= Math.min(this.repFactor, minHealthyForMaintenance);
this.container = container;

for (ContainerReplica cr : this.replica) {
HddsProtos.NodeOperationalState state =
cr.getDatanodeDetails().getPersistedOpState();
if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
decommissionCount++;
} else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
maintenanceCount++;
} else {
healthyCount++;
}
}
}

public int getHealthyCount() {
return healthyCount;
}
public interface ContainerReplicaCount {
ContainerInfo getContainer();

public int getDecommissionCount() {
return decommissionCount;
}
Set<ContainerReplica> getReplicas();

public int getMaintenanceCount() {
return maintenanceCount;
}
boolean isSufficientlyReplicated();

public int getReplicationFactor() {
return repFactor;
}
boolean isOverReplicated();

public ContainerInfo getContainer() {
return container;
}
int getDecommissionCount();

public Set<ContainerReplica> getReplica() {
return replica;
}

@Override
public String toString() {
return "Container State: " + container.getState() +
" Replica Count: " + replica.size() +
" Healthy Count: " + healthyCount +
" Decommission Count: " + decommissionCount +
" Maintenance Count: " + maintenanceCount +
" inFlightAdd Count: " + inFlightAdd +
" inFightDel Count: " + inFlightDel +
" ReplicationFactor: " + repFactor +
" minMaintenance Count: " + minHealthyForMaintenance;
}

/**
* Calculates the the delta of replicas which need to be created or removed
* to ensure the container is correctly replicated when considered inflight
* adds and deletes.
*
* When considering inflight operations, it is assumed any operation will
* fail. However, to consider the worst case and avoid data loss, we always
* assume a delete will succeed and and add will fail. In this way, we will
* avoid scheduling too many deletes which could result in dataloss.
*
* Decisions around over-replication are made only on healthy replicas,
* ignoring any in maintenance and also any inflight adds. InFlight adds are
* ignored, as they may not complete, so if we have:
*
* H, H, H, IN_FLIGHT_ADD
*
* And then schedule a delete, we could end up under-replicated (add fails,
* delete completes). It is better to let the inflight operations complete
* and then deal with any further over or under replication.
*
* For maintenance replicas, assuming replication factor 3, and minHealthy
* 2, it is possible for all 3 hosts to be put into maintenance, leaving the
* following (H = healthy, M = maintenance):
*
* H, H, M, M, M
*
* Even though we are tracking 5 replicas, this is not over replicated as we
* ignore the maintenance copies. Later, the replicas could look like:
*
* H, H, H, H, M
*
* At this stage, the container is over replicated by 1, so one replica can be
* removed.
*
* For containers which have replication factor healthy replica, we ignore any
* inflight add or deletes, as they may fail. Instead, wait for them to
* complete and then deal with any excess or deficit.
*
* For under replicated containers we do consider inflight add and delete to
* avoid scheduling more adds than needed. There is additional logic around
* containers with maintenance replica to ensure minHealthyForMaintenance
* replia are maintained.
*
* @return Delta of replicas needed. Negative indicates over replication and
* containers should be removed. Positive indicates over replication
* and zero indicates the containers has replicationFactor healthy
* replica
*/
public int additionalReplicaNeeded() {
int delta = missingReplicas();

if (delta < 0) {
// Over replicated, so may need to remove a container. Do not consider
// inFlightAdds, as they may fail, but do consider inFlightDel which
// will reduce the over-replication if it completes.
// Note this could make the delta positive if there are too many in flight
// deletes, which will result in an additional being scheduled.
return delta + inFlightDel;
} else {
// May be under or perfectly replicated.
// We must consider in flight add and delete when calculating the new
// containers needed, but we bound the lower limit at zero to allow
// inflight operations to complete before handling any potential over
// replication
return Math.max(0, delta - inFlightAdd + inFlightDel);
}
}

/**
* Returns the count of replicas which need to be created or removed to
* ensure the container is perfectly replicate. Inflight operations are not
* considered here, but the logic to determine the missing or excess counts
* for maintenance is present.
*
* Decisions around over-replication are made only on healthy replicas,
* ignoring any in maintenance. For example, if we have:
*
* H, H, H, M, M
*
* This will not be consider over replicated until one of the Maintenance
* replicas moves to Healthy.
*
* If the container is perfectly replicated, zero will be return.
*
* If it is under replicated a positive value will be returned, indicating
* how many replicas must be added.
*
* If it is over replicated a negative value will be returned, indicating now
* many replicas to remove.
*
* @return Zero if the container is perfectly replicated, a positive value
* for under replicated and a negative value for over replicated.
*/
private int missingReplicas() {
int delta = repFactor - healthyCount;

if (delta < 0) {
// Over replicated, so may need to remove a container.
return delta;
} else if (delta > 0) {
// May be under-replicated, depending on maintenance.
delta = Math.max(0, delta - maintenanceCount);
int neededHealthy =
Math.max(0, minHealthyForMaintenance - healthyCount);
delta = Math.max(neededHealthy, delta);
return delta;
} else { // delta == 0
// We have exactly the number of healthy replicas needed.
return delta;
}
}

/**
* Return true if the container is sufficiently replicated. Decommissioning
* and Decommissioned containers are ignored in this check, assuming they will
* eventually be removed from the cluster.
* This check ignores inflight additions, as those replicas have not yet been
* created and the create could fail for some reason.
* The check does consider inflight deletes as there may be 3 healthy replicas
* now, but once the delete completes it will reduce to 2.
* We also assume a replica in Maintenance state cannot be removed, so the
* pending delete would affect only the healthy replica count.
*
* @return True if the container is sufficiently replicated and False
* otherwise.
*/
public boolean isSufficientlyReplicated() {
return missingReplicas() + inFlightDel <= 0;
}

/**
* Return true is the container is over replicated. Decommission and
* maintenance containers are ignored for this check.
* The check ignores inflight additions, as they may fail, but it does
* consider inflight deletes, as they would reduce the over replication when
* they complete.
*
* @return True if the container is over replicated, false otherwise.
*/
public boolean isOverReplicated() {
return missingReplicas() + inFlightDel < 0;
}
int getMaintenanceCount();

/**
* Returns true if the container is healthy, meaning all replica which are not
Expand All @@ -262,22 +48,21 @@ public boolean isOverReplicated() {
*
* @return true if the container is healthy, false otherwise
*/
public boolean isHealthy() {
return (container.getState() == HddsProtos.LifeCycleState.CLOSED
|| container.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED)
&& replica.stream()
default boolean isHealthy() {
HddsProtos.LifeCycleState containerState = getContainer().getState();
return (containerState == HddsProtos.LifeCycleState.CLOSED
|| containerState == HddsProtos.LifeCycleState.QUASI_CLOSED)
&& getReplicas().stream()
.filter(r -> r.getDatanodeDetails().getPersistedOpState() == IN_SERVICE)
.allMatch(r -> LegacyReplicationManager.compareState(
container.getState(), r.getState()));
containerState, r.getState()));

}

/**
* Returns true is there are no replicas of a container available, ie the
* set of container replica passed in the constructor has zero entries.
* Return true if there are insufficient replicas to recover this container.
*
* @return true if there are no replicas, false otherwise.
* @return true if there are insufficient replicas, false otherwise.
*/
public boolean isMissing() {
return replica.size() == 0;
}
boolean isUnrecoverable();
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
* * Maintenance copies are not considered until they are back to IN_SERVICE
*/

public class ECContainerReplicaCount {
public class ECContainerReplicaCount implements ContainerReplicaCount {

private final ContainerInfo containerInfo;
private final ECReplicationConfig repConfig;
Expand Down Expand Up @@ -128,10 +128,26 @@ public ECContainerReplicaCount(ContainerInfo containerInfo,
}
}

@Override
public ContainerInfo getContainer() {
return containerInfo;
}

@Override
public Set<ContainerReplica> getReplicas() {
return replicas;
}

@Override
public int getDecommissionCount() {
return decommissionIndexes.size();
}

@Override
public int getMaintenanceCount() {
return maintenanceIndexes.size();
}

/**
* Get a set containing all decommissioning indexes, or an empty set if none
* are decommissioning. Note it is possible for an index to be
Expand Down Expand Up @@ -184,7 +200,8 @@ public Set<Integer> maintenanceIndexes() {
* Ie, less than EC Datanum containers are present.
* @return True if the container cannot be recovered, false otherwise.
*/
public boolean unRecoverable() {
@Override
public boolean isUnrecoverable() {
Set<Integer> distinct = new HashSet<>();
distinct.addAll(healthyIndexes.keySet());
distinct.addAll(decommissionIndexes.keySet());
Expand Down Expand Up @@ -292,6 +309,11 @@ public boolean isOverReplicated(boolean includePendingDelete) {
return false;
}

@Override
public boolean isOverReplicated() {
return isOverReplicated(false);
}

/**
* Return an unsorted list of any replica indexes which have more than one
* replica and are therefore over-replicated. Maintenance replicas are ignored
Expand Down Expand Up @@ -365,6 +387,11 @@ public boolean isSufficientlyReplicated(boolean includePendingAdd) {
>= repConfig.getData() + remainingMaintenanceRedundancy;
}

@Override
public boolean isSufficientlyReplicated() {
return isSufficientlyReplicated(false);
}

/**
* Check if there is an entry in the map for all expected replica indexes,
* and also that the count against each index is greater than zero.
Expand Down
Loading