Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.OverReplicatedHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult.UnderReplicatedHealthResult;

import java.util.ArrayList;
import java.util.Comparator;
Expand All @@ -34,6 +36,7 @@
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static org.apache.hadoop.hdds.scm.container.replication.ReplicationManager.compareState;

/**
* Immutable object that is created with a set of ContainerReplica objects and
Expand All @@ -50,6 +53,8 @@ public class RatisContainerReplicaCount implements ContainerReplicaCount {
private int matchingReplicaCount;
private int decommissionCount;
private int maintenanceCount;
private int unhealthyDecommissionCount;
private int unhealthyMaintenanceCount;
private int inFlightAdd;
private int inFlightDel;
private final int repFactor;
Expand All @@ -63,12 +68,6 @@ public RatisContainerReplicaCount(ContainerInfo container,
int inFlightAdd,
int inFlightDelete, int replicationFactor,
int minHealthyForMaintenance) {
this.unhealthyReplicaCount = 0;
this.healthyReplicaCount = 0;
this.misMatchedReplicaCount = 0;
this.matchingReplicaCount = 0;
this.decommissionCount = 0;
this.maintenanceCount = 0;
this.inFlightAdd = inFlightAdd;
this.inFlightDel = inFlightDelete;
this.repFactor = replicationFactor;
Expand All @@ -83,29 +82,7 @@ public RatisContainerReplicaCount(ContainerInfo container,
= Math.min(this.repFactor, minHealthyForMaintenance);
this.container = container;

for (ContainerReplica cr : this.replicas) {
HddsProtos.NodeOperationalState state =
cr.getDatanodeDetails().getPersistedOpState();
if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
decommissionCount++;
} else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
maintenanceCount++;
} else if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) {
unhealthyReplicaCount++;
} else if (!ReplicationManager.compareState(container.getState(),
cr.getState())) {
/*
Replica's state is not UNHEALTHY, but it doesn't match the
container's state. For example, a CLOSING replica of a CLOSED container.
*/
healthyReplicaCount++;
misMatchedReplicaCount++;
} else {
// replica's state exactly matches container's state
healthyReplicaCount++;
matchingReplicaCount++;
}
}
countReplicas();
}

public RatisContainerReplicaCount(ContainerInfo containerInfo,
Expand All @@ -126,15 +103,6 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
= Math.min(this.repFactor, minHealthyForMaintenance);
this.considerUnhealthy = considerUnhealthy;

this.unhealthyReplicaCount = 0;
this.healthyReplicaCount = 0;
this.misMatchedReplicaCount = 0;
this.matchingReplicaCount = 0;
this.decommissionCount = 0;
this.maintenanceCount = 0;
this.inFlightAdd = 0;
this.inFlightDel = 0;

// collect DNs that have UNHEALTHY replicas
Set<DatanodeDetails> unhealthyReplicaDNs = new HashSet<>();
for (ContainerReplica r : replicas) {
Expand Down Expand Up @@ -162,27 +130,37 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
}
}

for (ContainerReplica cr : this.replicas) {
countReplicas();
}

private void countReplicas() {
for (ContainerReplica cr : replicas) {
HddsProtos.NodeOperationalState state =
cr.getDatanodeDetails().getPersistedOpState();
boolean unhealthy =
cr.getState() == ContainerReplicaProto.State.UNHEALTHY;

if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
decommissionCount++;
if (unhealthy) {
unhealthyDecommissionCount++;
} else {
decommissionCount++;
}
} else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
maintenanceCount++;
} else if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) {
if (unhealthy) {
unhealthyMaintenanceCount++;
} else {
maintenanceCount++;
}
} else if (unhealthy) {
unhealthyReplicaCount++;
} else if (!ReplicationManager.compareState(container.getState(),
cr.getState())) {
/*
Replica's state is not UNHEALTHY, but it doesn't match the
container's state. For example, a CLOSING replica of a CLOSED container.
*/
healthyReplicaCount++;
misMatchedReplicaCount++;
} else {
// replica's state exactly matches container's state
healthyReplicaCount++;
matchingReplicaCount++;
if (compareState(container.getState(), cr.getState())) {
matchingReplicaCount++;
} else {
misMatchedReplicaCount++;
}
}
}
}
Expand All @@ -200,11 +178,13 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
* Total healthy replicas = 3 = 1 matching + 2 mismatched replicas
*/
public int getHealthyReplicaCount() {
return healthyReplicaCount + healthyReplicaCountAdapter();
return healthyReplicaCount + healthyReplicaCountAdapter()
+ decommissionCount + maintenanceCount;
}

public int getUnhealthyReplicaCount() {
return unhealthyReplicaCount + getUnhealthyReplicaCountAdapter();
return unhealthyReplicaCount + getUnhealthyReplicaCountAdapter()
+ unhealthyDecommissionCount + unhealthyMaintenanceCount;
}

protected int getUnhealthyReplicaCountAdapter() {
Expand All @@ -220,11 +200,11 @@ public int getMatchingReplicaCount() {
}

private int getAvailableReplicas() {
int available = healthyReplicaCount + healthyReplicaCountAdapter();
if (considerUnhealthy) {
return getHealthyReplicaCount() + getUnhealthyReplicaCount();
} else {
return getHealthyReplicaCount();
available += unhealthyReplicaCount + getUnhealthyReplicaCountAdapter();
}
return available;
}

/**
Expand All @@ -241,12 +221,16 @@ protected int healthyReplicaCountAdapter() {

@Override
public int getDecommissionCount() {
return decommissionCount;
return considerUnhealthy
? decommissionCount + unhealthyDecommissionCount
: decommissionCount;
}

@Override
public int getMaintenanceCount() {
return maintenanceCount;
return considerUnhealthy
? maintenanceCount + unhealthyMaintenanceCount
: maintenanceCount;
}

public int getReplicationFactor() {
Expand All @@ -265,16 +249,20 @@ public List<ContainerReplica> getReplicas() {

@Override
public String toString() {
return "Container State: " + container.getState() +
String result = "Container State: " + container.getState() +
" Replica Count: " + replicas.size() +
" Healthy Count: " + healthyReplicaCount +
" Unhealthy Count: " + unhealthyReplicaCount +
" Decommission Count: " + decommissionCount +
" Maintenance Count: " + maintenanceCount +
" inFlightAdd Count: " + inFlightAdd +
" inFightDel Count: " + inFlightDel +
" Healthy (I/D/M): " + healthyReplicaCount +
"/" + decommissionCount + "/" + maintenanceCount +
" Unhealthy (I/D/M): " + unhealthyReplicaCount +
"/" + unhealthyDecommissionCount + "/" + unhealthyMaintenanceCount +
" inFlightAdd: " + inFlightAdd +
" inFightDel: " + inFlightDel +
" ReplicationFactor: " + repFactor +
" minMaintenance Count: " + minHealthyForMaintenance;
" minMaintenance: " + minHealthyForMaintenance;
if (considerUnhealthy) {
result += " +considerUnhealthy";
}
return result;
}

/**
Expand Down Expand Up @@ -378,7 +366,7 @@ private int missingReplicas() {
return delta;
} else if (delta > 0) {
// May be under-replicated, depending on maintenance.
delta = Math.max(0, delta - maintenanceCount);
delta = Math.max(0, delta - getMaintenanceCount());
int neededHealthy =
Math.max(0, minHealthyForMaintenance - getAvailableReplicas());
delta = Math.max(neededHealthy, delta);
Expand Down Expand Up @@ -520,16 +508,13 @@ private int redundancyDelta(boolean includePendingDelete,
/**
* Checks whether insufficient replication is because of some replicas
* being on datanodes that were decommissioned.
* @param includePendingAdd if pending adds should be considered
*
* @return true if there is insufficient replication and it's because of
* decommissioning.
*/
public boolean inSufficientDueToDecommission(boolean includePendingAdd) {
if (isSufficientlyReplicated(includePendingAdd)) {
return false;
}
int delta = redundancyDelta(true, includePendingAdd);
return decommissionCount >= delta;
private boolean inSufficientDueToDecommission() {
int delta = redundancyDelta(true, false);
return 0 < delta && delta <= getDecommissionCount();
}

/**
Expand All @@ -542,9 +527,10 @@ public boolean inSufficientDueToDecommission(boolean includePendingAdd) {
* @return Count of remaining redundant replicas.
*/
public int getRemainingRedundancy() {
return Math.max(0,
getAvailableReplicas() + decommissionCount + maintenanceCount
- inFlightDel - 1);
int availableReplicas = getAvailableReplicas()
+ getDecommissionCount() + getMaintenanceCount();

return Math.max(0, availableReplicas - inFlightDel - 1);
}

/**
Expand All @@ -557,4 +543,28 @@ public int getRemainingRedundancy() {
public boolean isUnrecoverable() {
return getReplicas().isEmpty();
}

public UnderReplicatedHealthResult toUnderHealthResult() {
UnderReplicatedHealthResult result = new UnderReplicatedHealthResult(
getContainer(),
getRemainingRedundancy(),
inSufficientDueToDecommission(),
isSufficientlyReplicated(true),
isUnrecoverable());
result.setHasHealthyReplicas(getHealthyReplicaCount() > 0);
return result;
}

public OverReplicatedHealthResult toOverHealthResult() {
OverReplicatedHealthResult result = new OverReplicatedHealthResult(
getContainer(),
getExcessRedundancy(false),
!isOverReplicated(true));
result.setHasMismatchedReplicas(getMisMatchedReplicaCount() > 0);
// FIXME not used in RatisReplicationCheckHandler: OK?
result.setIsSafelyOverReplicated(isSafelyOverReplicated());
return result;

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,7 @@ public ContainerHealthResult checkHealth(ContainerCheckRequest request) {
boolean sufficientlyReplicated
= replicaCount.isSufficientlyReplicated(false);
if (!sufficientlyReplicated) {
ContainerHealthResult.UnderReplicatedHealthResult result =
new ContainerHealthResult.UnderReplicatedHealthResult(
container, replicaCount.getRemainingRedundancy(),
replicaCount.inSufficientDueToDecommission(false),
replicaCount.isSufficientlyReplicated(true),
replicaCount.isUnrecoverable());
result.setHasHealthyReplicas(replicaCount.getHealthyReplicaCount() > 0);
return result;
return replicaCount.toUnderHealthResult();
}

/*
Expand All @@ -209,14 +202,7 @@ of UNHEALTHY replicas (such as 3 CLOSED and 1 UNHEALTHY replicas of a
minReplicasForMaintenance, true);
boolean isOverReplicated = consideringUnhealthy.isOverReplicated(false);
if (isOverReplicated) {
boolean repOkWithPending = !consideringUnhealthy.isOverReplicated(true);
ContainerHealthResult.OverReplicatedHealthResult result =
new ContainerHealthResult.OverReplicatedHealthResult(
container, consideringUnhealthy.getExcessRedundancy(false),
repOkWithPending);
result.setHasMismatchedReplicas(
consideringUnhealthy.getMisMatchedReplicaCount() > 0);
return result;
return consideringUnhealthy.toOverHealthResult();
}

int requiredNodes = container.getReplicationConfig().getRequiredNodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,7 @@ public boolean handle(ContainerCheckRequest request) {
}

// Now, consider UNHEALTHY replicas when calculating replication status
RatisContainerReplicaCount replicaCount =
new RatisContainerReplicaCount(container,
request.getContainerReplicas(), request.getPendingOps(),
request.getMaintenanceRedundancy(), true);
RatisContainerReplicaCount replicaCount = getReplicaCount(request);
if (replicaCount.getUnhealthyReplicaCount() == 0) {
LOG.debug("No UNHEALTHY replicas are present for container {} with " +
"replicas [{}].", container, request.getContainerReplicas());
Expand All @@ -80,7 +77,7 @@ public boolean handle(ContainerCheckRequest request) {
container.containerID());
}

ContainerHealthResult health = checkReplication(request);
ContainerHealthResult health = checkReplication(replicaCount);
if (health.getHealthState()
== ContainerHealthResult.HealthState.UNDER_REPLICATED) {
ContainerHealthResult.UnderReplicatedHealthResult underHealth
Expand Down Expand Up @@ -155,39 +152,29 @@ private boolean verifyPerfectReplication(ContainerCheckRequest request) {
* Checks if the container is over or under replicated.
*/
@VisibleForTesting
protected ContainerHealthResult checkReplication(
ContainerHealthResult checkReplication(ContainerCheckRequest request) {
return checkReplication(getReplicaCount(request));
}

private static RatisContainerReplicaCount getReplicaCount(
ContainerCheckRequest request) {
RatisContainerReplicaCount replicaCount =
new RatisContainerReplicaCount(request.getContainerInfo(),
request.getContainerReplicas(), request.getPendingOps(),
request.getMaintenanceRedundancy(), true);
return new RatisContainerReplicaCount(request.getContainerInfo(),
request.getContainerReplicas(), request.getPendingOps(),
request.getMaintenanceRedundancy(), true);
}

private ContainerHealthResult checkReplication(
RatisContainerReplicaCount replicaCount) {

boolean sufficientlyReplicated
= replicaCount.isSufficientlyReplicated(false);
if (!sufficientlyReplicated) {
ContainerHealthResult.UnderReplicatedHealthResult result =
new ContainerHealthResult.UnderReplicatedHealthResult(
replicaCount.getContainer(),
replicaCount.getRemainingRedundancy(),
replicaCount.inSufficientDueToDecommission(false),
replicaCount.isSufficientlyReplicated(true),
replicaCount.isUnrecoverable());
result.setHasHealthyReplicas(replicaCount.getHealthyReplicaCount() > 0);
return result;
return replicaCount.toUnderHealthResult();
}

boolean isOverReplicated = replicaCount.isOverReplicated(false);
if (isOverReplicated) {
boolean repOkWithPending = !replicaCount.isOverReplicated(true);
ContainerHealthResult.OverReplicatedHealthResult result =
new ContainerHealthResult.OverReplicatedHealthResult(
replicaCount.getContainer(),
replicaCount.getExcessRedundancy(false),
repOkWithPending);
result.setHasMismatchedReplicas(
replicaCount.getMisMatchedReplicaCount() > 0);
result.setIsSafelyOverReplicated(replicaCount.isSafelyOverReplicated());
return result;
return replicaCount.toOverHealthResult();
}

return new ContainerHealthResult.UnHealthyResult(
Expand Down
Loading