diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java index 93b09d9ce591..0839f3af9f93 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/client/ContainerOperationClient.java @@ -198,18 +198,23 @@ public ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, /** * Returns a set of Nodes that meet a query criteria. * - * @param nodeStatuses - Criteria that we want the node to have. + * @param opState - The operational state we want the node to have + * eg IN_SERVICE, DECOMMISSIONED, etc + * @param nodeState - The health we want the node to have, eg HEALTHY, STALE, + * etc * @param queryScope - Query scope - Cluster or pool. * @param poolName - if it is pool, a pool name is required. * @return A set of nodes that meet the requested criteria. * @throws IOException */ @Override - public List queryNode(HddsProtos.NodeState - nodeStatuses, HddsProtos.QueryScope queryScope, String poolName) + public List queryNode( + HddsProtos.NodeOperationalState opState, + HddsProtos.NodeState nodeState, + HddsProtos.QueryScope queryScope, String poolName) throws IOException { - return storageContainerLocationClient.queryNode(nodeStatuses, queryScope, - poolName); + return storageContainerLocationClient.queryNode(opState, nodeState, + queryScope, poolName); } @Override diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index f96234cb14e0..88ddbe6a1380 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -151,15 +151,20 @@ ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, String owner) throws IOException; /** - * Returns a set of Nodes that meet a query criteria. - * @param nodeStatuses - Criteria that we want the node to have. + * Returns a set of Nodes that meet a query criteria. Passing null for opState + * or nodeState acts like a wild card, returning all nodes in that state. + * @param opState - Operational State of the node, eg IN_SERVICE, + * DECOMMISSIONED, etc + * @param nodeState - Health of the nodeCriteria that we want the node to + * have, eg HEALTHY, STALE etc * @param queryScope - Query scope - Cluster or pool. * @param poolName - if it is pool, a pool name is required. * @return A set of nodes that meet the requested criteria. * @throws IOException */ - List queryNode(HddsProtos.NodeState nodeStatuses, - HddsProtos.QueryScope queryScope, String poolName) throws IOException; + List queryNode(HddsProtos.NodeOperationalState opState, + HddsProtos.NodeState nodeState, HddsProtos.QueryScope queryScope, + String poolName) throws IOException; /** * Allows a list of hosts to be decommissioned. The hosts are identified diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 91cd40dc3f5a..8b37c57ee1e0 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -105,12 +105,15 @@ List listContainer(long startContainerID, int count) void deleteContainer(long containerID) throws IOException; /** - * Queries a list of Node Statuses. - * @param state + * Queries a list of Node Statuses. Passing a null for either opState or + * state acts like a wildcard returning all nodes in that state. + * @param opState The node operational state + * @param state The node health * @return List of Datanodes. */ - List queryNode(HddsProtos.NodeState state, - HddsProtos.QueryScope queryScope, String poolName) throws IOException; + List queryNode(HddsProtos.NodeOperationalState opState, + HddsProtos.NodeState state, HddsProtos.QueryScope queryScope, + String poolName) throws IOException; void decommissionNodes(List nodes) throws IOException; diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 99941d1c3346..0765a5608578 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -240,23 +240,34 @@ public void deleteContainer(long containerID) } /** - * Queries a list of Node Statuses. + * Queries a list of Nodes based on their operational state or health state. + * Passing a null for either value acts as a wildcard for that state. + * + * @param opState The operation state of the node + * @param nodeState The health of the node + * @return List of Datanodes. */ @Override - public List queryNode(HddsProtos.NodeState - nodeStatuses, HddsProtos.QueryScope queryScope, String poolName) + public List queryNode( + HddsProtos.NodeOperationalState opState, HddsProtos.NodeState + nodeState, HddsProtos.QueryScope queryScope, String poolName) throws IOException { // TODO : We support only cluster wide query right now. So ignoring checking // queryScope and poolName - Preconditions.checkNotNull(nodeStatuses); - NodeQueryRequestProto request = NodeQueryRequestProto.newBuilder() - .setState(nodeStatuses) + NodeQueryRequestProto.Builder builder = NodeQueryRequestProto.newBuilder() .setTraceID(TracingUtil.exportCurrentSpan()) - .setScope(queryScope).setPoolName(poolName).build(); + .setScope(queryScope).setPoolName(poolName); + if (opState != null) { + builder.setOpState(opState); + } + if (nodeState != null) { + builder.setState(nodeState); + } + NodeQueryRequestProto request = builder.build(); NodeQueryResponseProto response = submitRequest(Type.QueryNode, - builder -> builder.setNodeQueryRequest(request)).getNodeQueryResponse(); + builder1 -> builder1.setNodeQueryRequest(request)) + .getNodeQueryResponse(); return response.getDatanodesList(); - } /** diff --git a/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto b/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto index 00e58c0bc851..01779652118c 100644 --- a/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto +++ b/hadoop-hdds/common/src/main/proto/StorageContainerLocationProtocol.proto @@ -223,10 +223,11 @@ message ObjectStageChangeResponseProto { match the NodeState that we are requesting. */ message NodeQueryRequestProto { - required NodeState state = 1; + optional NodeState state = 1; required QueryScope scope = 2; optional string poolName = 3; // if scope is pool, then pool name is needed. optional string traceID = 4; + optional NodeOperationalState opState = 5; } message NodeQueryResponseProto { diff --git a/hadoop-hdds/common/src/main/proto/hdds.proto b/hadoop-hdds/common/src/main/proto/hdds.proto index 294f2b769512..f9ed49181491 100644 --- a/hadoop-hdds/common/src/main/proto/hdds.proto +++ b/hadoop-hdds/common/src/main/proto/hdds.proto @@ -118,6 +118,7 @@ enum QueryScope { message Node { required DatanodeDetailsProto nodeID = 1; repeated NodeState nodeStates = 2; + repeated NodeOperationalState nodeOperationalStates = 3; } message NodePool { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java index a001683f5ea1..9806fbb430c8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java @@ -18,7 +18,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; -import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState; import org.apache.hadoop.hdds.scm.container.ContainerManager; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; @@ -41,9 +40,9 @@ public class NodeDecommissionManager { private NodeManager nodeManager; - private PipelineManager pipeLineManager; - private ContainerManager containerManager; - private OzoneConfiguration conf; + // private PipelineManager pipeLineManager; + // private ContainerManager containerManager; + // private OzoneConfiguration conf; private boolean useHostnames; private List pendingNodes = new LinkedList<>(); @@ -161,10 +160,10 @@ private boolean validateDNPortMatch(int port, DatanodeDetails dn) { public NodeDecommissionManager(OzoneConfiguration conf, NodeManager nodeManager, PipelineManager pipelineManager, ContainerManager containerManager) { - this.conf = conf; this.nodeManager = nodeManager; - this.pipeLineManager = pipelineManager; - this.containerManager = containerManager; + //this.conf = conf; + //this.pipeLineManager = pipelineManager; + //this.containerManager = containerManager; useHostnames = conf.getBoolean( DFSConfigKeys.DFS_DATANODE_USE_DN_HOSTNAME, diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 53bd95d55a4d..2c55f04ce5bc 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -322,12 +322,12 @@ public NodeQueryResponseProto queryNode( throws IOException { HddsProtos.NodeState nodeState = request.getState(); - List datanodes = impl.queryNode(nodeState, + HddsProtos.NodeOperationalState opState = request.getOpState(); + List datanodes = impl.queryNode(opState, nodeState, request.getScope(), request.getPoolName()); return NodeQueryResponseProto.newBuilder() .addAllDatanodes(datanodes) .build(); - } public ObjectStageChangeResponseProto notifyObjectStageChange( diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 38d846fb5fbd..69c1d2dd3d08 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -347,7 +347,8 @@ public void deleteContainer(long containerID) throws IOException { } @Override - public List queryNode(HddsProtos.NodeState state, + public List queryNode( + HddsProtos.NodeOperationalState opState, HddsProtos.NodeState state, HddsProtos.QueryScope queryScope, String poolName) throws IOException { @@ -356,9 +357,11 @@ public List queryNode(HddsProtos.NodeState state, } List result = new ArrayList<>(); - queryNode(state).forEach(node -> result.add(HddsProtos.Node.newBuilder() + queryNode(opState, state) + .forEach(node -> result.add(HddsProtos.Node.newBuilder() .setNodeID(node.getProtoBufMessage()) .addNodeStates(state) + .addNodeOperationalStates(opState) .build())); return result; @@ -565,12 +568,14 @@ public boolean getReplicationManagerStatus() { * operation between the * operators. * - * @param state - NodeStates. + * @param opState - NodeOperational State + * @param state - NodeState. * @return List of Datanodes. */ - public List queryNode(HddsProtos.NodeState state) { + public List queryNode( + HddsProtos.NodeOperationalState opState, HddsProtos.NodeState state) { Preconditions.checkNotNull(state, "Node Query set cannot be null"); - return new ArrayList<>(queryNodeState(state)); + return new ArrayList<>(queryNodeState(opState, state)); } @VisibleForTesting @@ -589,14 +594,15 @@ public boolean getSafeModeStatus() { /** * Query the System for Nodes. * + * @params opState - The node operational state * @param nodeState - NodeState that we are interested in matching. * @return Set of Datanodes that match the NodeState. */ - private Set queryNodeState(HddsProtos.NodeState nodeState) { + private Set queryNodeState( + HddsProtos.NodeOperationalState opState, HddsProtos.NodeState nodeState) { Set returnSet = new TreeSet<>(); - // TODO - decomm states needed List tmp = scm.getScmNodeManager() - .getNodes(null, nodeState); + .getNodes(opState, nodeState); if ((tmp != null) && (tmp.size() > 0)) { returnSet.addAll(tmp); } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/TopologySubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/TopologySubcommand.java index 42773f82308f..731a617b34c3 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/TopologySubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/TopologySubcommand.java @@ -65,7 +65,7 @@ public class TopologySubcommand implements Callable { public Void call() throws Exception { try (ScmClient scmClient = parent.createScmClient()) { for (HddsProtos.NodeState state : stateArray) { - List nodes = scmClient.queryNode(state, + List nodes = scmClient.queryNode(null, state, HddsProtos.QueryScope.CLUSTER, ""); if (nodes != null && nodes.size() > 0) { // show node state @@ -83,36 +83,40 @@ public Void call() throws Exception { // Format // Location: rack1 - // ipAddress(hostName) + // ipAddress(hostName) OperationalState private void printOrderedByLocation(List nodes) { HashMap> tree = new HashMap<>(); + HashMap state = + new HashMap<>(); + for (HddsProtos.Node node : nodes) { String location = node.getNodeID().getNetworkLocation(); if (location != null && !tree.containsKey(location)) { tree.put(location, new TreeSet<>()); } - tree.get(location).add(DatanodeDetails.getFromProtoBuf(node.getNodeID())); + DatanodeDetails dn = DatanodeDetails.getFromProtoBuf(node.getNodeID()); + tree.get(location).add(dn); + state.put(dn, node.getNodeOperationalStates(0)); } ArrayList locations = new ArrayList<>(tree.keySet()); Collections.sort(locations); locations.forEach(location -> { System.out.println("Location: " + location); - tree.get(location).forEach(node -> { - System.out.println(" " + node.getIpAddress() + "(" + node.getHostName() - + ")"); + tree.get(location).forEach(n -> { + System.out.println(" " + n.getIpAddress() + "(" + n.getHostName() + + ") "+state.get(n)); }); }); } - - // Format "ipAddress(hostName) networkLocation" + // Format "ipAddress(hostName) OperationalState networkLocation" private void printNodesWithLocation(Collection nodes) { nodes.forEach(node -> { System.out.print(" " + node.getNodeID().getIpAddress() + "(" + node.getNodeID().getHostName() + ")"); - System.out.println(" " + + System.out.println(" " + node.getNodeOperationalStates(0) + " " + (node.getNodeID().getNetworkLocation() != null ? node.getNodeID().getNetworkLocation() : "NA")); }); diff --git a/hadoop-ozone/dist/src/main/smoketest/topology/scmcli.robot b/hadoop-ozone/dist/src/main/smoketest/topology/scmcli.robot index 823981d15f90..6267e09b3dbb 100644 --- a/hadoop-ozone/dist/src/main/smoketest/topology/scmcli.robot +++ b/hadoop-ozone/dist/src/main/smoketest/topology/scmcli.robot @@ -25,8 +25,8 @@ Resource ../commonlib.robot *** Test Cases *** Run printTopology ${output} = Execute ozone scmcli printTopology - Should contain ${output} 10.5.0.7(ozone-topology_datanode_4_1.ozone-topology_net) /rack2 + Should contain ${output} 10.5.0.7(ozone-topology_datanode_4_1.ozone-topology_net) IN_SERVICE /rack2 Run printTopology -o ${output} = Execute ozone scmcli printTopology -o Should contain ${output} Location: /rack2 - Should contain ${output} 10.5.0.7(ozone-topology_datanode_4_1.ozone-topology_net) + Should contain ${output} 10.5.0.7(ozone-topology_datanode_4_1.ozone-topology_net) IN_SERVICE diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java index c9b8c89e04da..eae76411e6fb 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/node/TestQueryNode.java @@ -16,6 +16,9 @@ */ package org.apache.hadoop.ozone.scm.node; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.node.NodeManager; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.MiniOzoneCluster; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -45,6 +48,10 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.DEAD; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.STALE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos. + NodeOperationalState.IN_SERVICE; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos. + NodeOperationalState.IN_MAINTENANCE; import static org.apache.hadoop.hdds.scm.ScmConfigKeys .OZONE_SCM_DEADNODE_INTERVAL; @@ -96,7 +103,7 @@ public void tearDown() throws Exception { @Test public void testHealthyNodesCount() throws Exception { - List nodes = scmClient.queryNode(HEALTHY, + List nodes = scmClient.queryNode(null, HEALTHY, HddsProtos.QueryScope.CLUSTER, ""); assertEquals("Expected live nodes", numOfDatanodes, nodes.size()); @@ -111,7 +118,7 @@ public void testStaleNodesCount() throws Exception { cluster.getStorageContainerManager().getNodeCount(STALE) == 2, 100, 4 * 1000); - int nodeCount = scmClient.queryNode(STALE, + int nodeCount = scmClient.queryNode(null, STALE, HddsProtos.QueryScope.CLUSTER, "").size(); assertEquals("Mismatch of expected nodes count", 2, nodeCount); @@ -120,13 +127,59 @@ public void testStaleNodesCount() throws Exception { 100, 4 * 1000); // Assert that we don't find any stale nodes. - nodeCount = scmClient.queryNode(STALE, + nodeCount = scmClient.queryNode(null, STALE, HddsProtos.QueryScope.CLUSTER, "").size(); assertEquals("Mismatch of expected nodes count", 0, nodeCount); // Assert that we find the expected number of dead nodes. - nodeCount = scmClient.queryNode(DEAD, + nodeCount = scmClient.queryNode(null, DEAD, HddsProtos.QueryScope.CLUSTER, "").size(); assertEquals("Mismatch of expected nodes count", 2, nodeCount); } + + @Test + public void testNodeOperationalStates() throws Exception { + StorageContainerManager scm = cluster.getStorageContainerManager(); + NodeManager nm = scm.getScmNodeManager(); + + // All nodes should be returned as they are all in service + int nodeCount = scmClient.queryNode(IN_SERVICE, HEALTHY, + HddsProtos.QueryScope.CLUSTER, "").size(); + assertEquals(numOfDatanodes, nodeCount); + + // null acts as wildcard for opState + nodeCount = scmClient.queryNode(null, HEALTHY, + HddsProtos.QueryScope.CLUSTER, "").size(); + assertEquals(numOfDatanodes, nodeCount); + + // null acts as wildcard for nodeState + nodeCount = scmClient.queryNode(IN_SERVICE, null, + HddsProtos.QueryScope.CLUSTER, "").size(); + assertEquals(numOfDatanodes, nodeCount); + + // Both null - should return all nodes + nodeCount = scmClient.queryNode(null, null, + HddsProtos.QueryScope.CLUSTER, "").size(); + assertEquals(numOfDatanodes, nodeCount); + + // No node should be returned + nodeCount = scmClient.queryNode(IN_MAINTENANCE, HEALTHY, + HddsProtos.QueryScope.CLUSTER, "").size(); + assertEquals(0, nodeCount); + + // Test all operational states by looping over them all and setting the + // state manually. + DatanodeDetails node = nm.getAllNodes().get(0); + for (HddsProtos.NodeOperationalState s : + HddsProtos.NodeOperationalState.values()) { + nm.setNodeOperationalState(node, s); + nodeCount = scmClient.queryNode(s, HEALTHY, + HddsProtos.QueryScope.CLUSTER, "").size(); + if (s == IN_SERVICE) { + assertEquals(5, nodeCount); + } else { + assertEquals(1, nodeCount); + } + } + } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 0cd087eee236..902fe6aed803 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -2394,8 +2394,8 @@ public List getServiceList() throws IOException { .setType(ServicePort.Type.RPC) .setValue(scmAddr.getPort()).build()); services.add(scmServiceInfoBuilder.build()); - - List nodes = scmContainerClient.queryNode(HEALTHY, + List nodes = scmContainerClient.queryNode( + HddsProtos.NodeOperationalState.IN_SERVICE, HEALTHY, HddsProtos.QueryScope.CLUSTER, ""); for (HddsProtos.Node node : nodes) {