diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java index b632b1708a0c..0cebcb10ef2c 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReportHandler.java @@ -47,14 +47,6 @@ public class ContainerReportHandler extends AbstractContainerReportHandler private static final Logger LOG = LoggerFactory.getLogger(ContainerReportHandler.class); - enum UnknownContainerAction { - WARN, DELETE; - - static UnknownContainerAction parse(String s) { - return s.equals(DELETE.name()) ? DELETE : WARN; - } - } - private final UnknownContainerAction unknownContainerHandleAction; /** @@ -273,4 +265,12 @@ private void processMissingReplicas(final DatanodeDetails datanodeDetails, } } } + + enum UnknownContainerAction { + WARN, DELETE; + + static UnknownContainerAction parse(String s) { + return s.equals(DELETE.name()) ? DELETE : WARN; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java index 790461101d3e..49a10058866f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/MoveManager.java @@ -53,55 +53,6 @@ public final class MoveManager implements ContainerReplicaPendingOpsSubscriber { - /** - * Various move return results. - */ - public enum MoveResult { - // both replication and deletion are completed - COMPLETED, - // RM is not ratis leader - FAIL_LEADER_NOT_READY, - // replication fail because the container does not exist in src - REPLICATION_FAIL_NOT_EXIST_IN_SOURCE, - // replication fail because the container exists in target - REPLICATION_FAIL_EXIST_IN_TARGET, - // replication fail because the container is not cloesed - REPLICATION_FAIL_CONTAINER_NOT_CLOSED, - // replication fail because the container is in inflightDeletion - REPLICATION_FAIL_INFLIGHT_DELETION, - // replication fail because the container is in inflightReplication - REPLICATION_FAIL_INFLIGHT_REPLICATION, - // replication fail because of timeout - REPLICATION_FAIL_TIME_OUT, - // replication fail because of node is not in service - REPLICATION_FAIL_NODE_NOT_IN_SERVICE, - // replication fail because node is unhealthy - REPLICATION_FAIL_NODE_UNHEALTHY, - // replication succeed, but deletion fail because of timeout - DELETION_FAIL_TIME_OUT, - // deletion fail because of node is not in service - DELETION_FAIL_NODE_NOT_IN_SERVICE, - // replication succeed, but deletion fail because because - // node is unhealthy - DELETION_FAIL_NODE_UNHEALTHY, - // replication succeed, but if we delete the container from - // the source datanode , the policy(eg, replica num or - // rack location) will not be satisfied, so we should not delete - // the container - DELETE_FAIL_POLICY, - /* - Container is not healthy if it has issues such as under, over, or mis - replication. We don't try to move replicas of such containers. - */ - REPLICATION_NOT_HEALTHY_BEFORE_MOVE, - // replicas + target - src does not satisfy placement policy - REPLICATION_NOT_HEALTHY_AFTER_MOVE, - // A move is already scheduled for this container - FAIL_CONTAINER_ALREADY_BEING_MOVED, - // Unexpected error - FAIL_UNEXPECTED_ERROR - } - private static final Logger LOG = LoggerFactory.getLogger(MoveManager.class); @@ -502,4 +453,53 @@ void setMoveTimeout(long moveTimeout) { void setReplicationTimeout(long replicationTimeout) { this.replicationTimeout = replicationTimeout; } + + /** + * Various move return results. + */ + public enum MoveResult { + // both replication and deletion are completed + COMPLETED, + // RM is not ratis leader + FAIL_LEADER_NOT_READY, + // replication fail because the container does not exist in src + REPLICATION_FAIL_NOT_EXIST_IN_SOURCE, + // replication fail because the container exists in target + REPLICATION_FAIL_EXIST_IN_TARGET, + // replication fail because the container is not cloesed + REPLICATION_FAIL_CONTAINER_NOT_CLOSED, + // replication fail because the container is in inflightDeletion + REPLICATION_FAIL_INFLIGHT_DELETION, + // replication fail because the container is in inflightReplication + REPLICATION_FAIL_INFLIGHT_REPLICATION, + // replication fail because of timeout + REPLICATION_FAIL_TIME_OUT, + // replication fail because of node is not in service + REPLICATION_FAIL_NODE_NOT_IN_SERVICE, + // replication fail because node is unhealthy + REPLICATION_FAIL_NODE_UNHEALTHY, + // replication succeed, but deletion fail because of timeout + DELETION_FAIL_TIME_OUT, + // deletion fail because of node is not in service + DELETION_FAIL_NODE_NOT_IN_SERVICE, + // replication succeed, but deletion fail because because + // node is unhealthy + DELETION_FAIL_NODE_UNHEALTHY, + // replication succeed, but if we delete the container from + // the source datanode , the policy(eg, replica num or + // rack location) will not be satisfied, so we should not delete + // the container + DELETE_FAIL_POLICY, + /* + Container is not healthy if it has issues such as under, over, or mis + replication. We don't try to move replicas of such containers. + */ + REPLICATION_NOT_HEALTHY_BEFORE_MOVE, + // replicas + target - src does not satisfy placement policy + REPLICATION_NOT_HEALTHY_AFTER_MOVE, + // A move is already scheduled for this container + FAIL_CONTAINER_ALREADY_BEING_MOVED, + // Unexpected error + FAIL_UNEXPECTED_ERROR + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java index 34e8b9d323fc..f4489da345e1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java @@ -27,17 +27,6 @@ */ public class ContainerHealthResult { - /** - * All possible container health states. - */ - public enum HealthState { - HEALTHY, - UNHEALTHY, - UNDER_REPLICATED, - OVER_REPLICATED, - MIS_REPLICATED - } - private final ContainerInfo containerInfo; private final HealthState healthState; private final List> commands = new ArrayList<>(); @@ -402,4 +391,15 @@ public void setIsSafelyOverReplicated(boolean isSafelyOverReplicated) { this.isSafelyOverReplicated = isSafelyOverReplicated; } } + + /** + * All possible container health states. + */ + public enum HealthState { + HEALTHY, + UNHEALTHY, + UNDER_REPLICATED, + OVER_REPLICATED, + MIS_REPLICATED + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaOp.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaOp.java index 5d9e1471653f..3c3e6fd712b2 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaOp.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerReplicaOp.java @@ -25,13 +25,6 @@ */ public class ContainerReplicaOp { - /** - * Enum representing different types of pending Ops. - */ - public enum PendingOpType { - ADD, DELETE - } - private final PendingOpType opType; private final DatanodeDetails target; private final int replicaIndex; @@ -72,4 +65,10 @@ public long getDeadlineEpochMillis() { return deadlineEpochMillis; } + /** + * Enum representing different types of pending Ops. + */ + public enum PendingOpType { + ADD, DELETE + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java index f8312a62db5a..8c41e89f3658 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java @@ -1132,14 +1132,6 @@ public static class ReplicationManagerConfiguration + "retried.") private long eventTimeout = Duration.ofMinutes(12).toMillis(); - public void setInterval(Duration interval) { - this.interval = interval; - } - - public void setEventTimeout(Duration timeout) { - this.eventTimeout = timeout.toMillis(); - } - /** * When a command has a deadline in SCM, the datanode timeout should be * slightly less. This duration is the number of seconds to subtract from @@ -1157,14 +1149,6 @@ public void setEventTimeout(Duration timeout) { + "should have expired.") private long datanodeTimeoutOffset = Duration.ofMinutes(6).toMillis(); - public long getDatanodeTimeoutOffset() { - return datanodeTimeoutOffset; - } - - public void setDatanodeTimeoutOffset(long val) { - datanodeTimeoutOffset = val; - } - /** * The number of container replica which must be available for a node to * enter maintenance. @@ -1181,10 +1165,6 @@ public void setDatanodeTimeoutOffset(long val) { " entering maintenance state until a new replica is created.") private int maintenanceReplicaMinimum = 2; - public void setMaintenanceReplicaMinimum(int replicaCount) { - this.maintenanceReplicaMinimum = replicaCount; - } - /** * Defines how many redundant replicas of a container must be online for a * node to enter maintenance. Currently, only used for EC containers. We @@ -1235,10 +1215,6 @@ public void setMaintenanceReplicaMinimum(int replicaCount) { ) private int datanodeReplicationLimit = 20; - public int getDatanodeReplicationLimit() { - return datanodeReplicationLimit; - } - @Config(key = "datanode.reconstruction.weight", type = ConfigType.INT, defaultValue = "3", @@ -1251,10 +1227,6 @@ public int getDatanodeReplicationLimit() { ) private int reconstructionCommandWeight = 3; - public int getReconstructionCommandWeight() { - return reconstructionCommandWeight; - } - @Config(key = "datanode.delete.container.limit", type = ConfigType.INT, defaultValue = "40", @@ -1267,10 +1239,6 @@ public int getReconstructionCommandWeight() { ) private int datanodeDeleteLimit = 40; - public int getDatanodeDeleteLimit() { - return datanodeDeleteLimit; - } - @Config(key = "inflight.limit.factor", type = ConfigType.DOUBLE, defaultValue = "0.75", @@ -1289,6 +1257,22 @@ public int getDatanodeDeleteLimit() { ) private double inflightReplicationLimitFactor = 0.75; + public long getDatanodeTimeoutOffset() { + return datanodeTimeoutOffset; + } + + public void setDatanodeTimeoutOffset(long val) { + datanodeTimeoutOffset = val; + } + + public int getReconstructionCommandWeight() { + return reconstructionCommandWeight; + } + + public int getDatanodeDeleteLimit() { + return datanodeDeleteLimit; + } + public double getInflightReplicationLimitFactor() { return inflightReplicationLimitFactor; } @@ -1297,6 +1281,10 @@ public void setInflightReplicationLimitFactor(double factor) { this.inflightReplicationLimitFactor = factor; } + public int getDatanodeReplicationLimit() { + return datanodeReplicationLimit; + } + public void setDatanodeReplicationLimit(int limit) { this.datanodeReplicationLimit = limit; } @@ -1313,6 +1301,10 @@ public Duration getInterval() { return interval; } + public void setInterval(Duration interval) { + this.interval = interval; + } + public Duration getUnderReplicatedInterval() { return underReplicatedInterval; } @@ -1333,10 +1325,18 @@ public long getEventTimeout() { return eventTimeout; } + public void setEventTimeout(Duration timeout) { + this.eventTimeout = timeout.toMillis(); + } + public int getMaintenanceReplicaMinimum() { return maintenanceReplicaMinimum; } + public void setMaintenanceReplicaMinimum(int replicaCount) { + this.maintenanceReplicaMinimum = replicaCount; + } + public boolean isPush() { return push; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java index e4e5a6938d28..75bffa01e242 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManagerUtil.java @@ -46,12 +46,11 @@ */ public final class ReplicationManagerUtil { + private static final Logger LOG = LoggerFactory.getLogger(ReplicationManagerUtil.class); + private ReplicationManagerUtil() { } - private static final Logger LOG = LoggerFactory.getLogger( - ReplicationManagerUtil.class); - /** * Using the passed placement policy attempt to select a list of datanodes to * use as new targets. If the placement policy is unable to select enough diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java index 593d965a6d16..b1ff5f4ae488 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/states/ContainerStateMap.java @@ -73,6 +73,25 @@ public class ContainerStateMap { private static final Logger LOG = LoggerFactory.getLogger(ContainerStateMap.class); + /** + * Map {@link LifeCycleState} to {@link ContainerInfo}. + * Note that a {@link ContainerInfo} can only exists in at most one of the {@link LifeCycleState}s. + */ + private final ContainerAttribute lifeCycleStateMap = new ContainerAttribute<>(LifeCycleState.class); + /** + * Map {@link ReplicationType} to {@link ContainerInfo}. + * Note that a {@link ContainerInfo} can only exists in at most one of the {@link ReplicationType}s. + */ + private final ContainerAttribute typeMap = new ContainerAttribute<>(ReplicationType.class); + /** + * Map {@link ContainerID} to ({@link ContainerInfo} and {@link ContainerReplica}). + * Note that the following sets are exactly the same + * 1. The {@link ContainerInfo} in this map. + * 2. The {@link ContainerInfo} in the union of all the states in {@link #lifeCycleStateMap}. + * 2. The {@link ContainerInfo} in the union of all the types in {@link #typeMap}. + */ + private final ContainerMap containerMap = new ContainerMap(); + /** * Two levels map. * Outer container map: {@link ContainerID} -> {@link ContainerEntry} (info and replicas) @@ -141,25 +160,6 @@ ContainerReplica removeReplica(ContainerID containerID, DatanodeID datanodeID) { } } - /** - * Map {@link LifeCycleState} to {@link ContainerInfo}. - * Note that a {@link ContainerInfo} can only exists in at most one of the {@link LifeCycleState}s. - */ - private final ContainerAttribute lifeCycleStateMap = new ContainerAttribute<>(LifeCycleState.class); - /** - * Map {@link ReplicationType} to {@link ContainerInfo}. - * Note that a {@link ContainerInfo} can only exists in at most one of the {@link ReplicationType}s. - */ - private final ContainerAttribute typeMap = new ContainerAttribute<>(ReplicationType.class); - /** - * Map {@link ContainerID} to ({@link ContainerInfo} and {@link ContainerReplica}). - * Note that the following sets are exactly the same - * 1. The {@link ContainerInfo} in this map. - * 2. The {@link ContainerInfo} in the union of all the states in {@link #lifeCycleStateMap}. - * 2. The {@link ContainerInfo} in the union of all the types in {@link #typeMap}. - */ - private final ContainerMap containerMap = new ContainerMap(); - /** * Create a ContainerStateMap. */ diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java index 377c2016d04a..cec412e9310b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/HASecurityUtils.java @@ -60,12 +60,11 @@ */ public final class HASecurityUtils { + private static final Logger LOG = LoggerFactory.getLogger(HASecurityUtils.class); + private HASecurityUtils() { } - private static final Logger LOG = - LoggerFactory.getLogger(HASecurityUtils.class); - /** * Initialize Security which generates public, private key pair and get SCM * signed certificate and persist to local disk. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java index 32ad973cd13e..50aafe189edb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java @@ -49,13 +49,6 @@ public final class SCMContext { public static final long INVALID_TERM = -1; private final String threadNamePrefix; - /** - * Used by non-HA mode SCM, Recon and Unit Tests. - */ - public static SCMContext emptyContext() { - return new SCMContext.Builder().buildMaybeInvalid(); - } - /** * Raft related info. */ @@ -85,6 +78,13 @@ private SCMContext(Builder b) { threadNamePrefix = b.threadNamePrefix; } + /** + * Used by non-HA mode SCM, Recon and Unit Tests. + */ + public static SCMContext emptyContext() { + return new SCMContext.Builder().buildMaybeInvalid(); + } + /** * @param leader : is leader or not * @param newTerm : term if current SCM becomes leader diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java index 2e680bd3c979..c9c44a7ef5e0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAMetrics.java @@ -33,36 +33,6 @@ @Metrics(about = "SCM HA metrics", context = OzoneConsts.OZONE) public final class SCMHAMetrics implements MetricsSource { - /** - * Metrics value holder. - */ - private static final class SCMHAMetricsInfo { - - private static final MetricsInfo SCM_MANAGER_HA_LEADER_STATE = - Interns.info("SCMHALeaderState", "Leader active " + - "state of SCM node (1 leader, 0 follower"); - private static final MetricsInfo NODE_ID = Interns.info("NodeId", - "SCM node Id"); - private int scmHALeaderState; - private String nodeId; - - public int getScmHALeaderState() { - return scmHALeaderState; - } - - public void setScmHALeaderState(int scmHALeaderState) { - this.scmHALeaderState = scmHALeaderState; - } - - public String getNodeId() { - return nodeId; - } - - public void setNodeId(String nodeId) { - this.nodeId = nodeId; - } - } - private static final String SOURCE_NAME = SCMHAMetrics.class.getSimpleName(); private final SCMHAMetricsInfo scmHAMetricsInfo = new SCMHAMetricsInfo(); private final String currNodeId; @@ -114,4 +84,33 @@ public int getSCMHAMetricsInfoLeaderState() { return scmHAMetricsInfo.getScmHALeaderState(); } + /** + * Metrics value holder. + */ + private static final class SCMHAMetricsInfo { + + private static final MetricsInfo SCM_MANAGER_HA_LEADER_STATE = + Interns.info("SCMHALeaderState", "Leader active " + + "state of SCM node (1 leader, 0 follower"); + private static final MetricsInfo NODE_ID = Interns.info("NodeId", + "SCM node Id"); + private int scmHALeaderState; + private String nodeId; + + public int getScmHALeaderState() { + return scmHALeaderState; + } + + public void setScmHALeaderState(int scmHALeaderState) { + this.scmHALeaderState = scmHALeaderState; + } + + public String getNodeId() { + return nodeId; + } + + public void setNodeId(String nodeId) { + this.nodeId = nodeId; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java index c3331ca72838..03cd49cebbd8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SequenceIdGenerator.java @@ -76,13 +76,6 @@ public class SequenceIdGenerator { private static final long INVALID_SEQUENCE_ID = 0; - static class Batch { - // The upper bound of the batch. - private long lastId = INVALID_SEQUENCE_ID; - // The next id to be allocated in this batch. - private long nextId = lastId + 1; - } - private final Map sequenceIdToBatchMap; private final Lock lock; @@ -433,4 +426,11 @@ public static void upgradeToCertificateSequenceId( sequenceIdTable.delete(ROOT_CERTIFICATE_ID); } } + + static class Batch { + // The upper bound of the batch. + private long lastId = INVALID_SEQUENCE_ID; + // The next id to be allocated in this batch. + private long nextId = lastId + 1; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java index 5fb0c2188c9c..4733c0629ded 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java @@ -72,6 +72,8 @@ */ public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor { + private static final Logger LOG = LoggerFactory.getLogger(DatanodeAdminMonitorImpl.class); + private EventPublisher eventQueue; private NodeManager nodeManager; private ReplicationManager replicationManager; @@ -86,6 +88,12 @@ public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor { private long unClosedContainers = 0; private long underReplicatedContainers = 0; + private Map containerStateByHost; + + // The number of containers for each of under replicated and unhealthy + // that will be logged in detail each time a node is checked. + private final int containerDetailsLoggingLimit; + /** * Inner class for snapshot of Datanode ContainerState in * Decommissioning and Maintenance mode workflow. @@ -130,14 +138,6 @@ public void setContainersReplicatedOnNode(List underReplicated, Lis } } - private Map containerStateByHost; - - private static final Logger LOG = - LoggerFactory.getLogger(DatanodeAdminMonitorImpl.class); - // The number of containers for each of under replicated and unhealthy - // that will be logged in detail each time a node is checked. - private final int containerDetailsLoggingLimit; - public DatanodeAdminMonitorImpl( OzoneConfiguration conf, EventPublisher eventQueue, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionMetrics.java index dbe4b37d26b2..f5adcf45cc20 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionMetrics.java @@ -60,82 +60,6 @@ public final class NodeDecommissionMetrics implements MetricsSource { @Metric("Number of containers sufficiently replicated in tracked nodes.") private MutableGaugeLong containersSufficientlyReplicatedTotal; - /** - * Inner class for snapshot of Datanode ContainerState in - * Decommissioning and Maintenance mode workflow. - */ - public static final class ContainerStateInWorkflow { - private long sufficientlyReplicated = 0; - private long unclosedContainers = 0; - private long underReplicatedContainers = 0; - private String host = ""; - private long pipelinesWaitingToClose = 0; - private long startTime = 0; - - private static final MetricsInfo HOST_UNDER_REPLICATED = Interns.info( - "UnderReplicatedDN", - "Number of under-replicated containers " - + "for host in decommissioning and " - + "maintenance mode"); - - private static final MetricsInfo HOST_PIPELINES_TO_CLOSE = Interns.info( - "PipelinesWaitingToCloseDN", - "Number of pipelines waiting to close for " - + "host in decommissioning and " - + "maintenance mode"); - - private static final MetricsInfo HOST_SUFFICIENTLY_REPLICATED = Interns - .info( - "SufficientlyReplicatedDN", - "Number of sufficiently replicated containers " - + "for host in decommissioning and " - + "maintenance mode"); - - private static final MetricsInfo HOST_UNCLOSED_CONTAINERS = Interns.info("UnclosedContainersDN", - "Number of containers not fully closed for host in decommissioning and maintenance mode"); - - private static final MetricsInfo HOST_START_TIME = Interns.info("StartTimeDN", - "Time at which decommissioning was started"); - - public ContainerStateInWorkflow(String host, - long sufficiently, - long under, - long unclosed, - long pipelinesToClose, - long startTime) { - this.host = host; - sufficientlyReplicated = sufficiently; - underReplicatedContainers = under; - unclosedContainers = unclosed; - pipelinesWaitingToClose = pipelinesToClose; - this.startTime = startTime; - } - - public String getHost() { - return host; - } - - public long getSufficientlyReplicated() { - return sufficientlyReplicated; - } - - public long getPipelinesWaitingToClose() { - return pipelinesWaitingToClose; - } - - public long getUnderReplicatedContainers() { - return underReplicatedContainers; - } - - public long getUnclosedContainers() { - return unclosedContainers; - } - - public long getStartTime() { - return startTime; - } - } - private MetricsRegistry registry; private Map metricsByHost; @@ -293,4 +217,80 @@ public Long getUnClosedContainersByHost(String host) { return workflowMetrics == null ? null : workflowMetrics.getUnclosedContainers(); } + + /** + * Inner class for snapshot of Datanode ContainerState in + * Decommissioning and Maintenance mode workflow. + */ + public static final class ContainerStateInWorkflow { + private long sufficientlyReplicated = 0; + private long unclosedContainers = 0; + private long underReplicatedContainers = 0; + private String host = ""; + private long pipelinesWaitingToClose = 0; + private long startTime = 0; + + private static final MetricsInfo HOST_UNDER_REPLICATED = Interns.info( + "UnderReplicatedDN", + "Number of under-replicated containers " + + "for host in decommissioning and " + + "maintenance mode"); + + private static final MetricsInfo HOST_PIPELINES_TO_CLOSE = Interns.info( + "PipelinesWaitingToCloseDN", + "Number of pipelines waiting to close for " + + "host in decommissioning and " + + "maintenance mode"); + + private static final MetricsInfo HOST_SUFFICIENTLY_REPLICATED = Interns + .info( + "SufficientlyReplicatedDN", + "Number of sufficiently replicated containers " + + "for host in decommissioning and " + + "maintenance mode"); + + private static final MetricsInfo HOST_UNCLOSED_CONTAINERS = Interns.info("UnclosedContainersDN", + "Number of containers not fully closed for host in decommissioning and maintenance mode"); + + private static final MetricsInfo HOST_START_TIME = Interns.info("StartTimeDN", + "Time at which decommissioning was started"); + + public ContainerStateInWorkflow(String host, + long sufficiently, + long under, + long unclosed, + long pipelinesToClose, + long startTime) { + this.host = host; + sufficientlyReplicated = sufficiently; + underReplicatedContainers = under; + unclosedContainers = unclosed; + pipelinesWaitingToClose = pipelinesToClose; + this.startTime = startTime; + } + + public String getHost() { + return host; + } + + public long getSufficientlyReplicated() { + return sufficientlyReplicated; + } + + public long getPipelinesWaitingToClose() { + return pipelinesWaitingToClose; + } + + public long getUnderReplicatedContainers() { + return underReplicatedContainers; + } + + public long getUnclosedContainers() { + return unclosedContainers; + } + + public long getStartTime() { + return startTime; + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java index eab514f4c8a3..71644cda864a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStateManager.java @@ -85,13 +85,6 @@ */ public class NodeStateManager implements Runnable, Closeable { - /** - * Node's life cycle events. - */ - private enum NodeLifeCycleEvent { - TIMEOUT, RESTORE, RESURRECT, LAYOUT_MISMATCH, LAYOUT_MATCH - } - private static final Logger LOG = LoggerFactory .getLogger(NodeStateManager.class); @@ -1076,4 +1069,11 @@ ScheduledFuture unpause() { protected void removeNode(DatanodeID datanodeID) { nodeStateMap.removeNode(datanodeID); } + + /** + * Node's life cycle events. + */ + private enum NodeLifeCycleEvent { + TIMEOUT, RESTORE, RESURRECT, LAYOUT_MISMATCH, LAYOUT_MATCH + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStatus.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStatus.java index 6e6667e650cf..fab56d92c268 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStatus.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeStatus.java @@ -59,6 +59,27 @@ public final class NodeStatus { CONSTANTS = Collections.unmodifiableMap(map); } + private static final Set OUT_OF_SERVICE_STATES = Collections.unmodifiableSet( + EnumSet.of(DECOMMISSIONING, DECOMMISSIONED, ENTERING_MAINTENANCE, IN_MAINTENANCE)); + + private static final NodeStatus IN_SERVICE_HEALTHY = valueOf(IN_SERVICE, HEALTHY); + + private static final NodeStatus IN_SERVICE_HEALTHY_READONLY = valueOf(IN_SERVICE, HEALTHY_READONLY); + + private static final Set MAINTENANCE_STATES = Collections.unmodifiableSet( + EnumSet.of(ENTERING_MAINTENANCE, IN_MAINTENANCE)); + + private static final Set DECOMMISSION_STATES = Collections.unmodifiableSet( + EnumSet.of(DECOMMISSIONING, DECOMMISSIONED)); + + private static final NodeStatus IN_SERVICE_STALE = NodeStatus.valueOf(IN_SERVICE, STALE); + + private static final NodeStatus IN_SERVICE_DEAD = NodeStatus.valueOf(IN_SERVICE, DEAD); + + private final NodeState health; + private final NodeOperationalState operationalState; + private final long opStateExpiryEpochSeconds; + /** @return a {@link NodeStatus} object with {@link #opStateExpiryEpochSeconds} == 0. */ public static NodeStatus valueOf(NodeOperationalState op, NodeState health) { return valueOf(op, health, 0); @@ -72,9 +93,6 @@ public static NodeStatus valueOf(NodeOperationalState op, NodeState health, long : new NodeStatus(health, op, opExpiryEpochSeconds); } - private static final Set MAINTENANCE_STATES = Collections.unmodifiableSet( - EnumSet.of(ENTERING_MAINTENANCE, IN_MAINTENANCE)); - /** * @return the set consists of {@link NodeOperationalState#ENTERING_MAINTENANCE} * and {@link NodeOperationalState#IN_MAINTENANCE}. @@ -83,9 +101,6 @@ public static Set maintenanceStates() { return MAINTENANCE_STATES; } - private static final Set DECOMMISSION_STATES = Collections.unmodifiableSet( - EnumSet.of(DECOMMISSIONING, DECOMMISSIONED)); - /** * @return the set consists of {@link NodeOperationalState#DECOMMISSIONING} * and {@link NodeOperationalState#DECOMMISSIONED}. @@ -94,9 +109,6 @@ public static Set decommissionStates() { return DECOMMISSION_STATES; } - private static final Set OUT_OF_SERVICE_STATES = Collections.unmodifiableSet( - EnumSet.of(DECOMMISSIONING, DECOMMISSIONED, ENTERING_MAINTENANCE, IN_MAINTENANCE)); - /** * @return the set consists of {@link NodeOperationalState#DECOMMISSIONING}, * {@link NodeOperationalState#DECOMMISSIONED}, @@ -107,38 +119,26 @@ public static Set outOfServiceStates() { return OUT_OF_SERVICE_STATES; } - private static final NodeStatus IN_SERVICE_HEALTHY = valueOf(IN_SERVICE, HEALTHY); - /** @return the status of {@link NodeOperationalState#IN_SERVICE} and {@link NodeState#HEALTHY}. */ public static NodeStatus inServiceHealthy() { return IN_SERVICE_HEALTHY; } - private static final NodeStatus IN_SERVICE_HEALTHY_READONLY = valueOf(IN_SERVICE, HEALTHY_READONLY); - /** @return the status of {@link NodeOperationalState#IN_SERVICE} and {@link NodeState#HEALTHY_READONLY}. */ public static NodeStatus inServiceHealthyReadOnly() { return IN_SERVICE_HEALTHY_READONLY; } - private static final NodeStatus IN_SERVICE_STALE = NodeStatus.valueOf(IN_SERVICE, STALE); - /** @return the status of {@link NodeOperationalState#IN_SERVICE} and {@link NodeState#STALE}. */ public static NodeStatus inServiceStale() { return IN_SERVICE_STALE; } - private static final NodeStatus IN_SERVICE_DEAD = NodeStatus.valueOf(IN_SERVICE, DEAD); - /** @return the status of {@link NodeOperationalState#IN_SERVICE} and {@link NodeState#DEAD}. */ public static NodeStatus inServiceDead() { return IN_SERVICE_DEAD; } - private final NodeState health; - private final NodeOperationalState operationalState; - private final long opStateExpiryEpochSeconds; - private NodeStatus(NodeState health, NodeOperationalState op, long opExpiryEpochSeconds) { this.health = health; this.operationalState = op; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java index b1698628514a..c1f14d8cc655 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/ECPipelineProvider.java @@ -46,6 +46,15 @@ public class ECPipelineProvider extends PipelineProvider { private static final Logger LOG = LoggerFactory.getLogger(ECPipelineProvider.class); + static final Comparator CREATE_FOR_READ_COMPARATOR = (left, right) -> { + final int healthy = Boolean.compare(right.isHealthy(), left.isHealthy()); + if (healthy != 0) { + return healthy; + } + final int dead = Boolean.compare(left.isDead(), right.isDead()); + return dead != 0 ? dead : left.getOperationalState().compareTo(right.getOperationalState()); + }; + // TODO - EC Placement Policy. Standard Network Aware topology will not work // for EC as it stands. We may want an "as many racks as possible" // policy. HDDS-5326. @@ -97,15 +106,6 @@ protected Pipeline create(ECReplicationConfig replicationConfig, return createPipelineInternal(replicationConfig, nodes, dnIndexes); } - static final Comparator CREATE_FOR_READ_COMPARATOR = (left, right) -> { - final int healthy = Boolean.compare(right.isHealthy(), left.isHealthy()); - if (healthy != 0) { - return healthy; - } - final int dead = Boolean.compare(left.isDead(), right.isDead()); - return dead != 0 ? dead : left.getOperationalState().compareTo(right.getOperationalState()); - }; - @Override public Pipeline createForRead( ECReplicationConfig replicationConfig, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java index 91b4f26a86e0..1452779035f1 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/WritableECContainerProvider.java @@ -280,14 +280,6 @@ public static class WritableECContainerProviderConfig tags = ConfigTag.STORAGE) private int minimumPipelines = 5; - public int getMinimumPipelines() { - return minimumPipelines; - } - - public void setMinimumPipelines(int minPipelines) { - this.minimumPipelines = minPipelines; - } - private static final String PIPELINE_PER_VOLUME_FACTOR_KEY = "pipeline.per.volume.factor"; private static final double PIPELINE_PER_VOLUME_FACTOR_DEFAULT = 1; @@ -304,6 +296,14 @@ public void setMinimumPipelines(int minPipelines) { ) private double pipelinePerVolumeFactor = PIPELINE_PER_VOLUME_FACTOR_DEFAULT; + public int getMinimumPipelines() { + return minimumPipelines; + } + + public void setMinimumPipelines(int minPipelines) { + this.minimumPipelines = minPipelines; + } + public double getPipelinePerVolumeFactor() { return pipelinePerVolumeFactor; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMPolicyProvider.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMPolicyProvider.java index 147920ab641d..0faf49d3fb76 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMPolicyProvider.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMPolicyProvider.java @@ -55,15 +55,6 @@ public final class SCMPolicyProvider extends PolicyProvider { private static final Supplier SUPPLIER = MemoizedSupplier.valueOf(SCMPolicyProvider::new); - private SCMPolicyProvider() { - } - - @Private - @Unstable - public static SCMPolicyProvider getInstance() { - return SUPPLIER.get(); - } - private static final List SCM_SERVICES = Arrays.asList( new Service( @@ -92,6 +83,15 @@ public static SCMPolicyProvider getInstance() { ReconfigureProtocol.class) ); + private SCMPolicyProvider() { + } + + @Private + @Unstable + public static SCMPolicyProvider getInstance() { + return SUPPLIER.get(); + } + @Override public Service[] getServices() { return SCM_SERVICES.toArray(new Service[0]);