diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java index 67ee84512f1d..657d94ef67ca 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.scm.DatanodeAdminError; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -476,4 +477,12 @@ DecommissionScmResponseProto decommissionScm( * @throws IOException On error */ void reconcileContainer(long containerID) throws IOException; + + /** + * Get getVolumeInfos based on query conditions. + * + * @return Volume Information List. + * @throws IOException On error. + */ + GetVolumeInfosResponseProto getVolumeInfos() throws IOException; } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java index 1f0f8cd8b066..bdfe83e9cd0f 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -499,4 +500,13 @@ DecommissionScmResponseProto decommissionScm( * @throws IOException On error */ void reconcileContainer(long containerID) throws IOException; + + /** + * Retrieves volume information based on the specified query parameters. + * + * @return Volume Information List. + * @throws IOException + * I/O exceptions that may occur during the process of querying the volume. + */ + StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java index 7072d6090baa..f7600de6cc3d 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java @@ -86,6 +86,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineResponseProto; @@ -1238,4 +1240,16 @@ public void reconcileContainer(long containerID) throws IOException { // TODO check error handling. submitRequest(Type.ReconcileContainer, builder -> builder.setReconcileContainerRequest(request)); } + + @Override + public GetVolumeInfosResponseProto getVolumeInfos() throws IOException { + // Prepare parameters. + GetVolumeInfosRequestProto.Builder requestBuilder = + GetVolumeInfosRequestProto.newBuilder(); + // Submit request. + GetVolumeInfosResponseProto response = submitRequest(Type.GetVolumeFailureInfos, + builder -> builder.setGetVolumeInfosRequest(requestBuilder.build())). + getGetVolumeInfosResponse(); + return response; + } } diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index cadff023a061..c1a38848c0cf 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -86,6 +86,7 @@ message ScmContainerLocationRequest { optional GetMetricsRequestProto getMetricsRequest = 47; optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48; optional ReconcileContainerRequestProto reconcileContainerRequest = 49; + optional GetVolumeInfosRequestProto getVolumeInfosRequest = 50; } message ScmContainerLocationResponse { @@ -143,6 +144,7 @@ message ScmContainerLocationResponse { optional GetMetricsResponseProto getMetricsResponse = 47; optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48; optional ReconcileContainerResponseProto reconcileContainerResponse = 49; + optional GetVolumeInfosResponseProto getVolumeInfosResponse = 50; enum Status { OK = 1; @@ -199,6 +201,7 @@ enum Type { GetMetrics = 43; GetContainerBalancerStatusInfo = 44; ReconcileContainer = 45; + GetVolumeFailureInfos = 46; } /** @@ -685,6 +688,13 @@ message ReconcileContainerRequestProto { message ReconcileContainerResponseProto { } +message GetVolumeInfosRequestProto { +} + +message GetVolumeInfosResponseProto { + repeated VolumeInfoProto volumeInfos = 1; +} + /** * Protocol used from an HDFS node to StorageContainerManager. See the request * and response messages for details of the RPC calls. diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto index ef76205d91f7..3da7fb2e5057 100644 --- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto +++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto @@ -304,6 +304,14 @@ message RemoveScmResponseProto { optional string scmId = 2; } +message VolumeInfoProto { + optional DatanodeIDProto dataNodeId = 1; + optional string hostName = 2; + optional string volumeName = 3; + optional bool failed = 4; + optional int64 capacity = 5; +} + enum ReplicationType { RATIS = 1; STAND_ALONE = 2; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java index 8f8753a4991b..c107100b4c75 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/node/DatanodeInfo.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto; import com.google.common.annotations.VisibleForTesting; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -27,6 +28,7 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.VolumeInfoProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandQueueReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; @@ -49,7 +51,7 @@ public class DatanodeInfo extends DatanodeDetails { private volatile long lastHeartbeatTime; private long lastStatsUpdatedTime; private int failedVolumeCount; - + private List volumeInfos; private List storageReports; private List metadataStorageReports; private LayoutVersionProto lastKnownLayoutVersion; @@ -72,6 +74,7 @@ public DatanodeInfo(DatanodeDetails datanodeDetails, NodeStatus nodeStatus, layoutInfo != null ? layoutInfo.getMetadataLayoutVersion() : 0, layoutInfo != null ? layoutInfo.getSoftwareLayoutVersion() : 0); this.storageReports = Collections.emptyList(); + this.volumeInfos = Collections.emptyList(); this.nodeStatus = nodeStatus; this.metadataStorageReports = Collections.emptyList(); this.commandCounts = new HashMap<>(); @@ -155,16 +158,57 @@ public void updateStorageReports(List reports) { .filter(e -> e.hasFailed() && e.getFailed()) .count(); + // We choose to update the status of failed disks during the heartbeat, + // so we can directly retrieve it when querying. + List volumeInfoLists = new ArrayList<>(); + for (StorageReportProto report : reports) { + + String storageLocation = report.getStorageLocation(); + long capacity = report.getCapacity(); + + String hostName = getHostName(); + + boolean failed = false; + if (report.hasFailed() && report.getFailed()) { + failed = true; + } + + VolumeInfoProto volumeFailure = + VolumeInfoProto.newBuilder(). + setDataNodeId(getID().toProto()). + setHostName(hostName). + setVolumeName(storageLocation). + setCapacity(capacity). + setFailed(failed). + build(); + volumeInfoLists.add(volumeFailure); + } + try { lock.writeLock().lock(); lastStatsUpdatedTime = Time.monotonicNow(); failedVolumeCount = failedCount; storageReports = reports; + volumeInfos = Collections.unmodifiableList(volumeInfoLists); } finally { lock.writeLock().unlock(); } } + /** + * Get all volume information. + * + * @return VolumeInfo List. + */ + public List getVolumeInfos() { + try { + lock.readLock().lock(); + return volumeInfos; + } finally { + lock.readLock().unlock(); + } + } + /** * Updates the datanode metadata storage reports. * diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 8ce5ae44ab6b..d0c44a473424 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -93,6 +93,8 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesResponseProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosRequestProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeRequestProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto; @@ -739,6 +741,14 @@ public ScmContainerLocationResponse processRequest( .setStatus(Status.OK) .setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest())) .build(); + case GetVolumeFailureInfos: + GetVolumeInfosRequestProto getVolumeInfosRequest = request.getGetVolumeInfosRequest(); + GetVolumeInfosResponseProto getVolumeInfosResponse = getVolumeInfos(getVolumeInfosRequest); + return ScmContainerLocationResponse.newBuilder() + .setCmdType(request.getCmdType()) + .setStatus(Status.OK) + .setGetVolumeInfosResponse(getVolumeInfosResponse) + .build(); default: throw new IllegalArgumentException( "Unknown command type: " + request.getCmdType()); @@ -1359,6 +1369,22 @@ public GetMetricsResponseProto getMetrics(GetMetricsRequestProto request) throws return GetMetricsResponseProto.newBuilder().setMetricsJson(impl.getMetrics(request.getQuery())).build(); } + /** + * Get getVolumeInfos based on query conditions. + * + * @param request The request object containing the parameters to + * fetch volume information (GetVolumeInfosRequestProto). + * @return A response object containing the volume information + * (GetVolumeInfosResponseProto). + * @throws IOException + * If an input/output exception occurs while processing the request. + */ + public GetVolumeInfosResponseProto getVolumeInfos( + GetVolumeInfosRequestProto request) throws IOException { + // Invoke method and return result + return impl.getVolumeInfos(); + } + public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequestProto request) throws IOException { impl.reconcileContainer(request.getContainerID()); return ReconcileContainerResponseProto.getDefaultInstance(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 03774c48699c..726fd25b37e4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -49,6 +49,7 @@ import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdds.client.ReplicationConfig; @@ -58,11 +59,13 @@ import org.apache.hadoop.hdds.protocol.DatanodeID; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos.VolumeInfoProto; import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto.Builder; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolPB; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolServerSideTranslatorPB; @@ -90,7 +93,9 @@ import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes; import org.apache.hadoop.hdds.scm.ha.SCMRatisServer; import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; +import org.apache.hadoop.hdds.scm.node.DatanodeInfo; import org.apache.hadoop.hdds.scm.node.DatanodeUsageInfo; +import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.node.NodeStatus; import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -1679,4 +1684,34 @@ public void reconcileContainer(long longContainerID) throws IOException { throw ex; } } + + @Override + public StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException { + GetVolumeInfosResponseProto.Builder getVolumeInfosResponseBuilder = + GetVolumeInfosResponseProto.newBuilder(); + NodeManager scmNodeManager = scm.getScmNodeManager(); + List allNodes = scmNodeManager.getAllNodes(); + // If the filtered list is empty, we will return directly. + if (CollectionUtils.isEmpty(allNodes)) { + return getVolumeInfosResponseBuilder.build(); + } + // We convert it to a list of VolumeInfoProto. + List volumeInfos = convertToVolumeInfos(allNodes); + if (CollectionUtils.isNotEmpty(volumeInfos)) { + getVolumeInfosResponseBuilder.addAllVolumeInfos(volumeInfos); + } + return getVolumeInfosResponseBuilder.build(); + } + + private List convertToVolumeInfos(List allNodes) { + List result = new ArrayList<>(); + for (DatanodeDetails datanode : allNodes) { + DatanodeInfo detail = (DatanodeInfo) datanode; + List volumeInfos = detail.getVolumeInfos(); + if (CollectionUtils.isNotEmpty(volumeInfos)) { + result.addAll(volumeInfos); + } + } + return result; + } } diff --git a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java index 2d5db1dfaf36..1644ccc3de39 100644 --- a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java +++ b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; @@ -607,4 +608,9 @@ public String getMetrics(String query) throws IOException { public void reconcileContainer(long id) throws IOException { storageContainerLocationClient.reconcileContainer(id); } + + @Override + public StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException { + return storageContainerLocationClient.getVolumeInfos(); + } }