2222import org .apache .hadoop .hdds .protocol .proto .StorageContainerDatanodeProtocolProtos .ContainerReplicaProto ;
2323import org .apache .hadoop .hdds .scm .container .ContainerInfo ;
2424import org .apache .hadoop .hdds .scm .container .ContainerReplica ;
25+ import org .apache .hadoop .hdds .scm .container .replication .ContainerHealthResult .OverReplicatedHealthResult ;
26+ import org .apache .hadoop .hdds .scm .container .replication .ContainerHealthResult .UnderReplicatedHealthResult ;
2527
2628import java .util .ArrayList ;
2729import java .util .Comparator ;
3436import static org .apache .hadoop .hdds .protocol .proto .HddsProtos .NodeOperationalState .DECOMMISSIONING ;
3537import static org .apache .hadoop .hdds .protocol .proto .HddsProtos .NodeOperationalState .ENTERING_MAINTENANCE ;
3638import static org .apache .hadoop .hdds .protocol .proto .HddsProtos .NodeOperationalState .IN_MAINTENANCE ;
39+ import static org .apache .hadoop .hdds .scm .container .replication .ReplicationManager .compareState ;
3740
3841/**
3942 * Immutable object that is created with a set of ContainerReplica objects and
@@ -50,6 +53,8 @@ public class RatisContainerReplicaCount implements ContainerReplicaCount {
5053 private int matchingReplicaCount ;
5154 private int decommissionCount ;
5255 private int maintenanceCount ;
56+ private int unhealthyDecommissionCount ;
57+ private int unhealthyMaintenanceCount ;
5358 private int inFlightAdd ;
5459 private int inFlightDel ;
5560 private final int repFactor ;
@@ -63,12 +68,6 @@ public RatisContainerReplicaCount(ContainerInfo container,
6368 int inFlightAdd ,
6469 int inFlightDelete , int replicationFactor ,
6570 int minHealthyForMaintenance ) {
66- this .unhealthyReplicaCount = 0 ;
67- this .healthyReplicaCount = 0 ;
68- this .misMatchedReplicaCount = 0 ;
69- this .matchingReplicaCount = 0 ;
70- this .decommissionCount = 0 ;
71- this .maintenanceCount = 0 ;
7271 this .inFlightAdd = inFlightAdd ;
7372 this .inFlightDel = inFlightDelete ;
7473 this .repFactor = replicationFactor ;
@@ -83,29 +82,7 @@ public RatisContainerReplicaCount(ContainerInfo container,
8382 = Math .min (this .repFactor , minHealthyForMaintenance );
8483 this .container = container ;
8584
86- for (ContainerReplica cr : this .replicas ) {
87- HddsProtos .NodeOperationalState state =
88- cr .getDatanodeDetails ().getPersistedOpState ();
89- if (state == DECOMMISSIONED || state == DECOMMISSIONING ) {
90- decommissionCount ++;
91- } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE ) {
92- maintenanceCount ++;
93- } else if (cr .getState () == ContainerReplicaProto .State .UNHEALTHY ) {
94- unhealthyReplicaCount ++;
95- } else if (!ReplicationManager .compareState (container .getState (),
96- cr .getState ())) {
97- /*
98- Replica's state is not UNHEALTHY, but it doesn't match the
99- container's state. For example, a CLOSING replica of a CLOSED container.
100- */
101- healthyReplicaCount ++;
102- misMatchedReplicaCount ++;
103- } else {
104- // replica's state exactly matches container's state
105- healthyReplicaCount ++;
106- matchingReplicaCount ++;
107- }
108- }
85+ countReplicas ();
10986 }
11087
11188 public RatisContainerReplicaCount (ContainerInfo containerInfo ,
@@ -126,15 +103,6 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
126103 = Math .min (this .repFactor , minHealthyForMaintenance );
127104 this .considerUnhealthy = considerUnhealthy ;
128105
129- this .unhealthyReplicaCount = 0 ;
130- this .healthyReplicaCount = 0 ;
131- this .misMatchedReplicaCount = 0 ;
132- this .matchingReplicaCount = 0 ;
133- this .decommissionCount = 0 ;
134- this .maintenanceCount = 0 ;
135- this .inFlightAdd = 0 ;
136- this .inFlightDel = 0 ;
137-
138106 // collect DNs that have UNHEALTHY replicas
139107 Set <DatanodeDetails > unhealthyReplicaDNs = new HashSet <>();
140108 for (ContainerReplica r : replicas ) {
@@ -162,27 +130,37 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
162130 }
163131 }
164132
165- for (ContainerReplica cr : this .replicas ) {
133+ countReplicas ();
134+ }
135+
136+ private void countReplicas () {
137+ for (ContainerReplica cr : replicas ) {
166138 HddsProtos .NodeOperationalState state =
167139 cr .getDatanodeDetails ().getPersistedOpState ();
140+ boolean unhealthy =
141+ cr .getState () == ContainerReplicaProto .State .UNHEALTHY ;
142+
168143 if (state == DECOMMISSIONED || state == DECOMMISSIONING ) {
169- decommissionCount ++;
144+ if (unhealthy ) {
145+ unhealthyDecommissionCount ++;
146+ } else {
147+ decommissionCount ++;
148+ }
170149 } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE ) {
171- maintenanceCount ++;
172- } else if (cr .getState () == ContainerReplicaProto .State .UNHEALTHY ) {
150+ if (unhealthy ) {
151+ unhealthyMaintenanceCount ++;
152+ } else {
153+ maintenanceCount ++;
154+ }
155+ } else if (unhealthy ) {
173156 unhealthyReplicaCount ++;
174- } else if (!ReplicationManager .compareState (container .getState (),
175- cr .getState ())) {
176- /*
177- Replica's state is not UNHEALTHY, but it doesn't match the
178- container's state. For example, a CLOSING replica of a CLOSED container.
179- */
180- healthyReplicaCount ++;
181- misMatchedReplicaCount ++;
182157 } else {
183- // replica's state exactly matches container's state
184158 healthyReplicaCount ++;
185- matchingReplicaCount ++;
159+ if (compareState (container .getState (), cr .getState ())) {
160+ matchingReplicaCount ++;
161+ } else {
162+ misMatchedReplicaCount ++;
163+ }
186164 }
187165 }
188166 }
@@ -200,11 +178,13 @@ public RatisContainerReplicaCount(ContainerInfo containerInfo,
200178 * Total healthy replicas = 3 = 1 matching + 2 mismatched replicas
201179 */
202180 public int getHealthyReplicaCount () {
203- return healthyReplicaCount + healthyReplicaCountAdapter ();
181+ return healthyReplicaCount + healthyReplicaCountAdapter ()
182+ + decommissionCount + maintenanceCount ;
204183 }
205184
206185 public int getUnhealthyReplicaCount () {
207- return unhealthyReplicaCount + getUnhealthyReplicaCountAdapter ();
186+ return unhealthyReplicaCount + getUnhealthyReplicaCountAdapter ()
187+ + unhealthyDecommissionCount + unhealthyMaintenanceCount ;
208188 }
209189
210190 protected int getUnhealthyReplicaCountAdapter () {
@@ -220,11 +200,11 @@ public int getMatchingReplicaCount() {
220200 }
221201
222202 private int getAvailableReplicas () {
203+ int available = healthyReplicaCount + healthyReplicaCountAdapter ();
223204 if (considerUnhealthy ) {
224- return getHealthyReplicaCount () + getUnhealthyReplicaCount ();
225- } else {
226- return getHealthyReplicaCount ();
205+ available += unhealthyReplicaCount + getUnhealthyReplicaCountAdapter ();
227206 }
207+ return available ;
228208 }
229209
230210 /**
@@ -241,12 +221,16 @@ protected int healthyReplicaCountAdapter() {
241221
242222 @ Override
243223 public int getDecommissionCount () {
244- return decommissionCount ;
224+ return considerUnhealthy
225+ ? decommissionCount + unhealthyDecommissionCount
226+ : decommissionCount ;
245227 }
246228
247229 @ Override
248230 public int getMaintenanceCount () {
249- return maintenanceCount ;
231+ return considerUnhealthy
232+ ? maintenanceCount + unhealthyMaintenanceCount
233+ : maintenanceCount ;
250234 }
251235
252236 public int getReplicationFactor () {
@@ -265,16 +249,20 @@ public List<ContainerReplica> getReplicas() {
265249
266250 @ Override
267251 public String toString () {
268- return "Container State: " + container .getState () +
252+ String result = "Container State: " + container .getState () +
269253 " Replica Count: " + replicas .size () +
270- " Healthy Count : " + healthyReplicaCount +
271- " Unhealthy Count: " + unhealthyReplicaCount +
272- " Decommission Count : " + decommissionCount +
273- " Maintenance Count: " + maintenanceCount +
274- " inFlightAdd Count : " + inFlightAdd +
275- " inFightDel Count : " + inFlightDel +
254+ " Healthy (I/D/M) : " + healthyReplicaCount +
255+ "/" + decommissionCount + "/" + maintenanceCount +
256+ " Unhealthy (I/D/M) : " + unhealthyReplicaCount +
257+ "/" + unhealthyDecommissionCount + "/" + unhealthyMaintenanceCount +
258+ " inFlightAdd: " + inFlightAdd +
259+ " inFightDel: " + inFlightDel +
276260 " ReplicationFactor: " + repFactor +
277- " minMaintenance Count: " + minHealthyForMaintenance ;
261+ " minMaintenance: " + minHealthyForMaintenance ;
262+ if (considerUnhealthy ) {
263+ result += " +considerUnhealthy" ;
264+ }
265+ return result ;
278266 }
279267
280268 /**
@@ -378,7 +366,7 @@ private int missingReplicas() {
378366 return delta ;
379367 } else if (delta > 0 ) {
380368 // May be under-replicated, depending on maintenance.
381- delta = Math .max (0 , delta - maintenanceCount );
369+ delta = Math .max (0 , delta - getMaintenanceCount () );
382370 int neededHealthy =
383371 Math .max (0 , minHealthyForMaintenance - getAvailableReplicas ());
384372 delta = Math .max (neededHealthy , delta );
@@ -520,16 +508,13 @@ private int redundancyDelta(boolean includePendingDelete,
520508 /**
521509 * Checks whether insufficient replication is because of some replicas
522510 * being on datanodes that were decommissioned.
523- * @param includePendingAdd if pending adds should be considered
511+ *
524512 * @return true if there is insufficient replication and it's because of
525513 * decommissioning.
526514 */
527- public boolean inSufficientDueToDecommission (boolean includePendingAdd ) {
528- if (isSufficientlyReplicated (includePendingAdd )) {
529- return false ;
530- }
531- int delta = redundancyDelta (true , includePendingAdd );
532- return decommissionCount >= delta ;
515+ private boolean inSufficientDueToDecommission () {
516+ int delta = redundancyDelta (true , false );
517+ return 0 < delta && delta <= getDecommissionCount ();
533518 }
534519
535520 /**
@@ -542,9 +527,10 @@ public boolean inSufficientDueToDecommission(boolean includePendingAdd) {
542527 * @return Count of remaining redundant replicas.
543528 */
544529 public int getRemainingRedundancy () {
545- return Math .max (0 ,
546- getAvailableReplicas () + decommissionCount + maintenanceCount
547- - inFlightDel - 1 );
530+ int availableReplicas = getAvailableReplicas ()
531+ + getDecommissionCount () + getMaintenanceCount ();
532+
533+ return Math .max (0 , availableReplicas - inFlightDel - 1 );
548534 }
549535
550536 /**
@@ -557,4 +543,28 @@ public int getRemainingRedundancy() {
557543 public boolean isUnrecoverable () {
558544 return getReplicas ().isEmpty ();
559545 }
546+
547+ public UnderReplicatedHealthResult toUnderHealthResult () {
548+ UnderReplicatedHealthResult result = new UnderReplicatedHealthResult (
549+ getContainer (),
550+ getRemainingRedundancy (),
551+ inSufficientDueToDecommission (),
552+ isSufficientlyReplicated (true ),
553+ isUnrecoverable ());
554+ result .setHasHealthyReplicas (getHealthyReplicaCount () > 0 );
555+ return result ;
556+ }
557+
558+ public OverReplicatedHealthResult toOverHealthResult () {
559+ OverReplicatedHealthResult result = new OverReplicatedHealthResult (
560+ getContainer (),
561+ getExcessRedundancy (false ),
562+ !isOverReplicated (true ));
563+ result .setHasMismatchedReplicas (getMisMatchedReplicaCount () > 0 );
564+ // FIXME not used in RatisReplicationCheckHandler: OK?
565+ result .setIsSafelyOverReplicated (isSafelyOverReplicated ());
566+ return result ;
567+
568+ }
569+
560570}
0 commit comments