diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 120535405ecd..402398e36c3f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -20,10 +20,12 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.annotation.InterfaceStability; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; +import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerReplicaInfo; import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; @@ -179,6 +181,14 @@ ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, HddsProtos.ReplicationFactor replicationFactor, String owner) throws IOException; + /** + * Gets the list of underReplicated and unClosed containers on a decommissioning node. + * + * @param dn - Datanode detail + * @return Lists of underReplicated and Unclosed containers + */ + Map> getContainersOnDecomNode(DatanodeDetails dn) throws IOException; + /** * Returns a set of Nodes that meet a query criteria. Passing null for opState * or nodeState acts like a wild card, returning all nodes in that state. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index be0f41b62295..dabdc0b82298 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -19,6 +19,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; @@ -220,6 +221,14 @@ List listContainer(long startContainerID, */ void deleteContainer(long containerID) throws IOException; + /** + * Gets the list of underReplicated and unClosed containers on a decommissioning node. + * + * @param dn - Datanode detail + * @return Lists of underReplicated and unClosed containers + */ + Map> getContainersOnDecomNode(DatanodeDetails dn) throws IOException; + /** * Queries a list of Node Statuses. Passing a null for either opState or * state acts like a wildcard returning all nodes in that state. diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index eb3f419e48d1..84a0fa4886ce 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicatedReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.GetScmInfoResponseProto; @@ -55,6 +56,9 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainersOnDecomNodeProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerWithPipelineBatchRequestProto; @@ -459,6 +463,23 @@ public void deleteContainer(long containerID) } + @Override + public Map> getContainersOnDecomNode(DatanodeDetails dn) throws IOException { + GetContainersOnDecomNodeRequestProto request = GetContainersOnDecomNodeRequestProto.newBuilder() + .setDatanodeDetails(dn.getProtoBufMessage()).build(); + GetContainersOnDecomNodeResponseProto response = submitRequest(Type.GetContainersOnDecomNode, + builder -> builder.setGetContainersOnDecomNodeRequest(request)).getGetContainersOnDecomNodeResponse(); + Map> containerMap = new HashMap<>(); + for (ContainersOnDecomNodeProto containersProto : response.getContainersOnDecomNodeList()) { + List containerIds = new ArrayList<>(); + for (HddsProtos.ContainerID id : containersProto.getIdList()) { + containerIds.add(ContainerID.getFromProtobuf(id)); + } + containerMap.put(containersProto.getName(), containerIds); + } + return containerMap; + } + /** * Queries a list of Nodes based on their operational state or health state. * Passing a null for either value acts as a wildcard for that state. diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index 6cfddcc2f6c4..6adca817ed1d 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -82,6 +82,7 @@ message ScmContainerLocationRequest { optional GetFailedDeletedBlocksTxnRequestProto getFailedDeletedBlocksTxnRequest = 43; optional DecommissionScmRequestProto decommissionScmRequest = 44; optional SingleNodeQueryRequestProto singleNodeQueryRequest = 45; + optional GetContainersOnDecomNodeRequestProto getContainersOnDecomNodeRequest = 46; } message ScmContainerLocationResponse { @@ -135,6 +136,7 @@ message ScmContainerLocationResponse { optional GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxnResponse = 43; optional DecommissionScmResponseProto decommissionScmResponse = 44; optional SingleNodeQueryResponseProto singleNodeQueryResponse = 45; + optional GetContainersOnDecomNodeResponseProto getContainersOnDecomNodeResponse = 46; enum Status { OK = 1; @@ -187,6 +189,7 @@ enum Type { GetFailedDeletedBlocksTransaction = 39; DecommissionScm = 40; SingleNodeQuery = 41; + GetContainersOnDecomNode = 42; } /** @@ -602,6 +605,19 @@ message DecommissionScmResponseProto { optional string errorMsg = 2; } +message GetContainersOnDecomNodeRequestProto { + required DatanodeDetailsProto datanodeDetails = 1; +} + +message ContainersOnDecomNodeProto { + required string name = 1; + repeated ContainerID id = 2; +} + +message GetContainersOnDecomNodeResponseProto { + repeated ContainersOnDecomNodeProto containersOnDecomNode = 1; +} + /** * Protocol used from an HDFS node to StorageContainerManager. See the request * and response messages for details of the RPC calls. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java index e0b4c3ce543c..fbfbb49c2521 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitor.java @@ -18,7 +18,11 @@ package org.apache.hadoop.hdds.scm.node; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; +import java.util.List; +import java.util.Map; import java.util.Set; /** @@ -31,4 +35,6 @@ public interface DatanodeAdminMonitor extends Runnable { void stopMonitoring(DatanodeDetails dn); Set getTrackedNodes(); void setMetrics(NodeDecommissionMetrics metrics); + Map> getContainersReplicatedOnNode(DatanodeDetails dn) + throws NodeNotFoundException; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java index 51c6d12dea92..d7975ff1e58e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeAdminMonitorImpl.java @@ -96,8 +96,8 @@ public class DatanodeAdminMonitorImpl implements DatanodeAdminMonitor { public static final class TrackedNode { private DatanodeDetails datanodeDetails; - private long startTime = 0L; + private Map> containersReplicatedOnNode = new ConcurrentHashMap<>(); public TrackedNode(DatanodeDetails datanodeDetails, long startTime) { this.datanodeDetails = datanodeDetails; @@ -122,6 +122,15 @@ public DatanodeDetails getDatanodeDetails() { public long getStartTime() { return startTime; } + + public Map> getContainersReplicatedOnNode() { + return containersReplicatedOnNode; + } + + public void setContainersReplicatedOnNode(List underReplicated, List unClosed) { + this.containersReplicatedOnNode.put("UnderReplicated", Collections.unmodifiableList(underReplicated)); + this.containersReplicatedOnNode.put("UnClosed", Collections.unmodifiableList(unClosed)); + } } private Map containerStateByHost; @@ -423,9 +432,7 @@ private boolean checkContainersReplicatedOnNode(TrackedNode dn) boolean isHealthy = replicaSet.isHealthyEnoughForOffline(); if (!isHealthy) { - if (LOG.isDebugEnabled()) { - unClosedIDs.add(cid); - } + unClosedIDs.add(cid); if (unclosed < containerDetailsLoggingLimit || LOG.isDebugEnabled()) { LOG.info("Unclosed Container {} {}; {}", cid, replicaSet, replicaDetails(replicaSet.getReplicas())); @@ -448,20 +455,18 @@ private boolean checkContainersReplicatedOnNode(TrackedNode dn) replicationManager.checkContainerStatus(replicaSet.getContainer(), report); replicatedOK = report.getStat(ReplicationManagerReport.HealthState.UNDER_REPLICATED) == 0; } - if (replicatedOK) { sufficientlyReplicated++; } else { - if (LOG.isDebugEnabled()) { - underReplicatedIDs.add(cid); - } + underReplicatedIDs.add(cid); if (underReplicated < containerDetailsLoggingLimit || LOG.isDebugEnabled()) { LOG.info("Under Replicated Container {} {}; {}", cid, replicaSet, replicaDetails(replicaSet.getReplicas())); } underReplicated++; } } catch (ContainerNotFoundException e) { - LOG.warn("ContainerID {} present in node list for {} but not found in containerManager", cid, dn); + LOG.warn("ContainerID {} present in node list for {} but not found in containerManager", cid, + dn.getDatanodeDetails()); } } LOG.info("{} has {} sufficientlyReplicated, {} deleting, {} " + @@ -485,9 +490,21 @@ private boolean checkContainersReplicatedOnNode(TrackedNode dn) unclosed, unClosedIDs.stream().map( Object::toString).collect(Collectors.joining(", "))); } + dn.setContainersReplicatedOnNode(underReplicatedIDs, unClosedIDs); return underReplicated == 0 && unclosed == 0; } + public Map> getContainersReplicatedOnNode(DatanodeDetails dn) { + Iterator iterator = trackedNodes.iterator(); + while (iterator.hasNext()) { + TrackedNode trackedNode = iterator.next(); + if (trackedNode.equals(new TrackedNode(dn, 0L))) { + return trackedNode.getContainersReplicatedOnNode(); + } + } + return new HashMap<>(); + } + private String replicaDetails(Collection replicas) { StringBuilder sb = new StringBuilder(); sb.append("Replicas{"); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java index c98cc63c4668..38e59b89e767 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/NodeDecommissionManager.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.ScmConfigKeys; +import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager; import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; @@ -40,6 +41,7 @@ import java.util.Comparator; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -292,6 +294,11 @@ public NodeDecommissionManager(OzoneConfiguration config, NodeManager nm, TimeUnit.SECONDS); } + public Map> getContainersReplicatedOnNode(DatanodeDetails dn) + throws NodeNotFoundException { + return getMonitor().getContainersReplicatedOnNode(dn); + } + @VisibleForTesting public DatanodeAdminMonitor getMonitor() { return monitor; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 6d47a78a7d77..f402b9309fe4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.ECReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipRequestProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.TransferLeadershipResponseProto; @@ -51,6 +52,9 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ForceExitSafeModeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerReplicasResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainersOnDecomNodeProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainerTokenRequestProto; @@ -614,6 +618,12 @@ public ScmContainerLocationResponse processRequest( .setDecommissionNodesResponse(decommissionNodes( request.getDecommissionNodesRequest())) .build(); + case GetContainersOnDecomNode: + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setGetContainersOnDecomNodeResponse(getContainersOnDecomNode(request.getGetContainersOnDecomNodeRequest())) + .build(); case RecommissionNodes: return ScmContainerLocationResponse.newBuilder() .setCmdType(request.getCmdType()) @@ -1160,6 +1170,22 @@ public DecommissionNodesResponseProto decommissionNodes( return response.build(); } + public GetContainersOnDecomNodeResponseProto getContainersOnDecomNode(GetContainersOnDecomNodeRequestProto request) + throws IOException { + Map> containerMap = impl.getContainersOnDecomNode( + DatanodeDetails.getFromProtoBuf(request.getDatanodeDetails())); + List containersProtoList = new ArrayList<>(); + for (Map.Entry> containerList : containerMap.entrySet()) { + List containerIdsProto = new ArrayList<>(); + for (ContainerID id : containerList.getValue()) { + containerIdsProto.add(id.getProtobuf()); + } + containersProtoList.add(ContainersOnDecomNodeProto.newBuilder().setName(containerList.getKey()) + .addAllId(containerIdsProto).build()); + } + return GetContainersOnDecomNodeResponseProto.newBuilder().addAllContainersOnDecomNode(containersProtoList).build(); + } + public RecommissionNodesResponseProto recommissionNodes( RecommissionNodesRequestProto request) throws IOException { List errors = diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index ac92ea893dbb..13bef8590b79 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -588,6 +588,15 @@ public void deleteContainer(long containerID) throws IOException { } } + @Override + public Map> getContainersOnDecomNode(DatanodeDetails dn) throws IOException { + try { + return scm.getScmDecommissionManager().getContainersReplicatedOnNode(dn); + } catch (NodeNotFoundException e) { + throw new IOException("Failed to get containers list. Unable to find required node", e); + } + } + @Override public List queryNode( HddsProtos.NodeOperationalState opState, HddsProtos.NodeState state, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index 523d4226cb43..5164eaafc281 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -837,6 +837,50 @@ public void testCancelledNodesMovedToInService() nodeManager.getNodeStatus(dn1).getOperationalState()); } + @Test + public void testContainersReplicatedOnDecomDnAPI() + throws NodeNotFoundException, ContainerNotFoundException { + conf.setBoolean("hdds.scm.replication.enable.legacy", false); + + DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails(); + nodeManager.register(dn1, + new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING, + HddsProtos.NodeState.HEALTHY)); + + Set containers = new HashSet<>(); + containers.add(ContainerID.valueOf(1)); + containers.add(ContainerID.valueOf(2)); + nodeManager.setContainers(dn1, containers); + DatanodeAdminMonitorTestUtil + .mockGetContainerReplicaCount(repManager, + true, + HddsProtos.LifeCycleState.CLOSED, + DECOMMISSIONING, + IN_SERVICE, + IN_SERVICE); + + monitor.startMonitoring(dn1); + monitor.run(); + assertEquals(1, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnderReplicated").size(), 2); + assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnClosed").size(), 0); + + DatanodeAdminMonitorTestUtil + .mockGetContainerReplicaCount(repManager, + true, + HddsProtos.LifeCycleState.OPEN, + IN_SERVICE); + + monitor.run(); + assertEquals(1, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnderReplicated").size(), 0); + assertEquals(monitor.getContainersReplicatedOnNode(dn1).get("UnClosed").size(), 2); + } + /** * Generate a set of ContainerID, starting from an ID of zero up to the given * count minus 1. diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 1daffbb9b940..d07e696e7ef0 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -216,6 +216,11 @@ public ContainerWithPipeline createContainer(HddsProtos.ReplicationType type, } } + @Override + public Map> getContainersOnDecomNode(DatanodeDetails dn) throws IOException { + return storageContainerLocationClient.getContainersOnDecomNode(dn); + } + @Override public List queryNode( HddsProtos.NodeOperationalState opState, diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java index bbf1d8407605..b53632f8eec5 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/datanode/DecommissionStatusSubCommand.java @@ -23,10 +23,12 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.cli.ScmSubcommand; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerID; import picocli.CommandLine; import java.io.IOException; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -83,6 +85,8 @@ public void execute(ScmClient scmClient) throws IOException { DatanodeDetails datanode = DatanodeDetails.getFromProtoBuf( node.getNodeID()); printDetails(datanode); + Map> containers = scmClient.getContainersOnDecomNode(datanode); + System.out.println(containers); } } private void printDetails(DatanodeDetails datanode) { diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java index 902ee5e7a8d1..ed05d6f0e9d1 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestDecommissionStatusSubCommand.java @@ -17,8 +17,10 @@ */ package org.apache.hadoop.hdds.scm.cli.datanode; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.client.ScmClient; +import org.apache.hadoop.hdds.scm.container.ContainerID; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -29,7 +31,9 @@ import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -55,6 +59,7 @@ public class TestDecommissionStatusSubCommand { private final PrintStream originalErr = System.err; private DecommissionStatusSubCommand cmd; private List nodes = getNodeDetails(2); + private Map> containerOnDecom = getContainersOnDecomNodes(); @BeforeEach public void setup() throws UnsupportedEncodingException { @@ -74,6 +79,7 @@ public void testSuccessWhenDecommissionStatus() throws IOException { ScmClient scmClient = mock(ScmClient.class); when(scmClient.queryNode(any(), any(), any(), any())) .thenAnswer(invocation -> nodes); // 2 nodes decommissioning + when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom); cmd.execute(scmClient); Pattern p = Pattern.compile("Decommission\\sStatus:\\s" + @@ -85,9 +91,15 @@ public void testSuccessWhenDecommissionStatus() throws IOException { p = Pattern.compile("Datanode:\\s.*host0\\)"); m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertTrue(m.find()); + p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertTrue(m.find()); p = Pattern.compile("Datanode:\\s.*host1\\)"); m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertTrue(m.find()); + p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertTrue(m.find()); } @Test @@ -96,6 +108,7 @@ public void testNoNodesWhenDecommissionStatus() throws IOException { // No nodes in decommissioning. No error is printed when(scmClient.queryNode(any(), any(), any(), any())) .thenReturn(new ArrayList<>()); + when(scmClient.getContainersOnDecomNode(any())).thenReturn(new HashMap<>()); cmd.execute(scmClient); Pattern p = Pattern.compile("Decommission\\sStatus:\\s" + @@ -117,6 +130,7 @@ public void testIdOptionDecommissionStatusSuccess() throws IOException { ScmClient scmClient = mock(ScmClient.class); when(scmClient.queryNode(any(), any(), any(), any())) .thenAnswer(invocation -> nodes); // 2 nodes decommissioning + when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom); CommandLine c = new CommandLine(cmd); c.parseArgs("--id", nodes.get(0).getNodeID().getUuid()); @@ -125,11 +139,17 @@ public void testIdOptionDecommissionStatusSuccess() throws IOException { Pattern p = Pattern.compile("Datanode:\\s.*host0\\)", Pattern.MULTILINE); Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertTrue(m.find()); + p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertTrue(m.find()); // as uuid of only host0 is passed, host1 should NOT be displayed p = Pattern.compile("Datanode:\\s.*host1.\\)", Pattern.MULTILINE); m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertFalse(m.find()); + p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertFalse(m.find()); } @Test @@ -137,6 +157,10 @@ public void testIdOptionDecommissionStatusFail() throws IOException { ScmClient scmClient = mock(ScmClient.class); when(scmClient.queryNode(any(), any(), any(), any())) .thenAnswer(invocation -> nodes.subList(0, 1)); // host0 decommissioning + when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(0).getNodeID()))) + .thenReturn(containerOnDecom); + when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(1).getNodeID()))) + .thenReturn(new HashMap<>()); CommandLine c = new CommandLine(cmd); c.parseArgs("--id", nodes.get(1).getNodeID().getUuid()); @@ -161,6 +185,7 @@ public void testIpOptionDecommissionStatusSuccess() throws IOException { ScmClient scmClient = mock(ScmClient.class); when(scmClient.queryNode(any(), any(), any(), any())) .thenAnswer(invocation -> nodes); // 2 nodes decommissioning + when(scmClient.getContainersOnDecomNode(any())).thenReturn(containerOnDecom); CommandLine c = new CommandLine(cmd); c.parseArgs("--ip", nodes.get(1).getNodeID().getIpAddress()); @@ -169,11 +194,17 @@ public void testIpOptionDecommissionStatusSuccess() throws IOException { Pattern p = Pattern.compile("Datanode:\\s.*host1\\)", Pattern.MULTILINE); Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertTrue(m.find()); + p = Pattern.compile("host1.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertTrue(m.find()); // as IpAddress of only host1 is passed, host0 should NOT be displayed p = Pattern.compile("Datanode:\\s.*host0.\\)", Pattern.MULTILINE); m = p.matcher(outContent.toString(DEFAULT_ENCODING)); assertFalse(m.find()); + p = Pattern.compile("host0.*[\r\n].*UnderReplicated.*UnClosed", Pattern.MULTILINE); + m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + assertFalse(m.find()); } @Test @@ -181,6 +212,10 @@ public void testIpOptionDecommissionStatusFail() throws IOException { ScmClient scmClient = mock(ScmClient.class); when(scmClient.queryNode(any(), any(), any(), any())) .thenAnswer(invocation -> nodes.subList(0, 1)); // host0 decommissioning + when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(0).getNodeID()))) + .thenReturn(containerOnDecom); + when(scmClient.getContainersOnDecomNode(DatanodeDetails.getFromProtoBuf(nodes.get(1).getNodeID()))) + .thenReturn(new HashMap<>()); CommandLine c = new CommandLine(cmd); c.parseArgs("--ip", nodes.get(1).getNodeID().getIpAddress()); @@ -225,4 +260,19 @@ private List getNodeDetails(int n) { return nodesList; } + private Map> getContainersOnDecomNodes() { + Map> containerMap = new HashMap<>(); + List underReplicated = new ArrayList<>(); + underReplicated.add(new ContainerID(1L)); + underReplicated.add(new ContainerID(2L)); + underReplicated.add(new ContainerID(3L)); + containerMap.put("UnderReplicated", underReplicated); + List unclosed = new ArrayList<>(); + unclosed.add(new ContainerID(10L)); + unclosed.add(new ContainerID(11L)); + unclosed.add(new ContainerID(12L)); + containerMap.put("UnClosed", unclosed); + return containerMap; + } + }