Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.container.ContainerID;
Expand Down Expand Up @@ -476,4 +477,12 @@ DecommissionScmResponseProto decommissionScm(
* @throws IOException On error
*/
void reconcileContainer(long containerID) throws IOException;

/**
* Get getVolumeInfos based on query conditions.
*
* @return Volume Information List.
* @throws IOException On error.
*/
GetVolumeInfosResponseProto getVolumeInfos() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
Expand Down Expand Up @@ -499,4 +500,13 @@ DecommissionScmResponseProto decommissionScm(
* @throws IOException On error
*/
void reconcileContainer(long containerID) throws IOException;

/**
* Retrieves volume information based on the specified query parameters.
*
* @return Volume Information List.
* @throws IOException
* I/O exceptions that may occur during the process of querying the volume.
*/
StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineResponseProto;
Expand Down Expand Up @@ -1238,4 +1240,16 @@ public void reconcileContainer(long containerID) throws IOException {
// TODO check error handling.
submitRequest(Type.ReconcileContainer, builder -> builder.setReconcileContainerRequest(request));
}

@Override
public GetVolumeInfosResponseProto getVolumeInfos() throws IOException {
// Prepare parameters.
GetVolumeInfosRequestProto.Builder requestBuilder =
GetVolumeInfosRequestProto.newBuilder();
// Submit request.
GetVolumeInfosResponseProto response = submitRequest(Type.GetVolumeFailureInfos,
builder -> builder.setGetVolumeInfosRequest(requestBuilder.build())).
getGetVolumeInfosResponse();
return response;
}
}
10 changes: 10 additions & 0 deletions hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ message ScmContainerLocationRequest {
optional GetMetricsRequestProto getMetricsRequest = 47;
optional ContainerBalancerStatusInfoRequestProto containerBalancerStatusInfoRequest = 48;
optional ReconcileContainerRequestProto reconcileContainerRequest = 49;
optional GetVolumeInfosRequestProto getVolumeInfosRequest = 50;
}

message ScmContainerLocationResponse {
Expand Down Expand Up @@ -143,6 +144,7 @@ message ScmContainerLocationResponse {
optional GetMetricsResponseProto getMetricsResponse = 47;
optional ContainerBalancerStatusInfoResponseProto containerBalancerStatusInfoResponse = 48;
optional ReconcileContainerResponseProto reconcileContainerResponse = 49;
optional GetVolumeInfosResponseProto getVolumeInfosResponse = 50;

enum Status {
OK = 1;
Expand Down Expand Up @@ -199,6 +201,7 @@ enum Type {
GetMetrics = 43;
GetContainerBalancerStatusInfo = 44;
ReconcileContainer = 45;
GetVolumeFailureInfos = 46;
}

/**
Expand Down Expand Up @@ -685,6 +688,13 @@ message ReconcileContainerRequestProto {
message ReconcileContainerResponseProto {
}

message GetVolumeInfosRequestProto {
}

message GetVolumeInfosResponseProto {
repeated VolumeInfoProto volumeInfos = 1;
}

/**
* Protocol used from an HDFS node to StorageContainerManager. See the request
* and response messages for details of the RPC calls.
Expand Down
8 changes: 8 additions & 0 deletions hadoop-hdds/interface-client/src/main/proto/hdds.proto
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,14 @@ message RemoveScmResponseProto {
optional string scmId = 2;
}

message VolumeInfoProto {
optional DatanodeIDProto dataNodeId = 1;
optional string hostName = 2;
optional string volumeName = 3;
optional bool failed = 4;
optional int64 capacity = 5;
}

enum ReplicationType {
RATIS = 1;
STAND_ALONE = 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
import static org.apache.hadoop.ozone.container.upgrade.UpgradeUtils.toLayoutVersionProto;

import com.google.common.annotations.VisibleForTesting;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.VolumeInfoProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandQueueReportProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
Expand All @@ -49,7 +51,7 @@ public class DatanodeInfo extends DatanodeDetails {
private volatile long lastHeartbeatTime;
private long lastStatsUpdatedTime;
private int failedVolumeCount;

private List<VolumeInfoProto> volumeInfos;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

volumeInfos is never assigned. I think the intention was to update this variable in updateStorageReports in the block holding writeLock.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for pointing out this issue! I hadn't noticed these details before.

private List<StorageReportProto> storageReports;
private List<MetadataStorageReportProto> metadataStorageReports;
private LayoutVersionProto lastKnownLayoutVersion;
Expand All @@ -72,6 +74,7 @@ public DatanodeInfo(DatanodeDetails datanodeDetails, NodeStatus nodeStatus,
layoutInfo != null ? layoutInfo.getMetadataLayoutVersion() : 0,
layoutInfo != null ? layoutInfo.getSoftwareLayoutVersion() : 0);
this.storageReports = Collections.emptyList();
this.volumeInfos = Collections.emptyList();
this.nodeStatus = nodeStatus;
this.metadataStorageReports = Collections.emptyList();
this.commandCounts = new HashMap<>();
Expand Down Expand Up @@ -155,16 +158,57 @@ public void updateStorageReports(List<StorageReportProto> reports) {
.filter(e -> e.hasFailed() && e.getFailed())
.count();

// We choose to update the status of failed disks during the heartbeat,
// so we can directly retrieve it when querying.
List<VolumeInfoProto> volumeInfoLists = new ArrayList<>();
for (StorageReportProto report : reports) {

String storageLocation = report.getStorageLocation();
long capacity = report.getCapacity();

String hostName = getHostName();

boolean failed = false;
if (report.hasFailed() && report.getFailed()) {
failed = true;
}

VolumeInfoProto volumeFailure =
VolumeInfoProto.newBuilder().
setDataNodeId(getID().toProto()).
setHostName(hostName).
setVolumeName(storageLocation).
setCapacity(capacity).
setFailed(failed).
build();
volumeInfoLists.add(volumeFailure);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please wrap volumeInfoLists in an unmodifiableList after all items are added.

try {
lock.writeLock().lock();
lastStatsUpdatedTime = Time.monotonicNow();
failedVolumeCount = failedCount;
storageReports = reports;
volumeInfos = Collections.unmodifiableList(volumeInfoLists);
} finally {
lock.writeLock().unlock();
}
}

/**
* Get all volume information.
*
* @return VolumeInfo List.
*/
public List<VolumeInfoProto> getVolumeInfos() {
try {
lock.readLock().lock();
return volumeInfos;
} finally {
lock.readLock().unlock();
}
}

/**
* Updates the datanode metadata storage reports.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetSafeModeRuleStatusesResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeRequestProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.InSafeModeResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ListPipelineRequestProto;
Expand Down Expand Up @@ -739,6 +741,14 @@ public ScmContainerLocationResponse processRequest(
.setStatus(Status.OK)
.setReconcileContainerResponse(reconcileContainer(request.getReconcileContainerRequest()))
.build();
case GetVolumeFailureInfos:
GetVolumeInfosRequestProto getVolumeInfosRequest = request.getGetVolumeInfosRequest();
GetVolumeInfosResponseProto getVolumeInfosResponse = getVolumeInfos(getVolumeInfosRequest);
return ScmContainerLocationResponse.newBuilder()
.setCmdType(request.getCmdType())
.setStatus(Status.OK)
.setGetVolumeInfosResponse(getVolumeInfosResponse)
.build();
default:
throw new IllegalArgumentException(
"Unknown command type: " + request.getCmdType());
Expand Down Expand Up @@ -1359,6 +1369,22 @@ public GetMetricsResponseProto getMetrics(GetMetricsRequestProto request) throws
return GetMetricsResponseProto.newBuilder().setMetricsJson(impl.getMetrics(request.getQuery())).build();
}

/**
* Get getVolumeInfos based on query conditions.
*
* @param request The request object containing the parameters to
* fetch volume information (GetVolumeInfosRequestProto).
* @return A response object containing the volume information
* (GetVolumeInfosResponseProto).
* @throws IOException
* If an input/output exception occurs while processing the request.
*/
public GetVolumeInfosResponseProto getVolumeInfos(
GetVolumeInfosRequestProto request) throws IOException {
// Invoke method and return result
return impl.getVolumeInfos();
}

public ReconcileContainerResponseProto reconcileContainer(ReconcileContainerRequestProto request) throws IOException {
impl.reconcileContainer(request.getContainerID());
return ReconcileContainerResponseProto.getDefaultInstance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.hdds.client.ReplicationConfig;
Expand All @@ -58,11 +59,13 @@
import org.apache.hadoop.hdds.protocol.DatanodeID;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.VolumeInfoProto;
import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto.Builder;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolPB;
import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolServerSideTranslatorPB;
Expand Down Expand Up @@ -90,7 +93,9 @@
import org.apache.hadoop.hdds.scm.exceptions.SCMException.ResultCodes;
import org.apache.hadoop.hdds.scm.ha.SCMRatisServer;
import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl;
import org.apache.hadoop.hdds.scm.node.DatanodeInfo;
import org.apache.hadoop.hdds.scm.node.DatanodeUsageInfo;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import org.apache.hadoop.hdds.scm.node.NodeStatus;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
Expand Down Expand Up @@ -1679,4 +1684,34 @@ public void reconcileContainer(long longContainerID) throws IOException {
throw ex;
}
}

@Override
public StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException {
GetVolumeInfosResponseProto.Builder getVolumeInfosResponseBuilder =
GetVolumeInfosResponseProto.newBuilder();
NodeManager scmNodeManager = scm.getScmNodeManager();
List<? extends DatanodeDetails> allNodes = scmNodeManager.getAllNodes();
// If the filtered list is empty, we will return directly.
if (CollectionUtils.isEmpty(allNodes)) {
return getVolumeInfosResponseBuilder.build();
}
// We convert it to a list of VolumeInfoProto.
List<VolumeInfoProto> volumeInfos = convertToVolumeInfos(allNodes);
if (CollectionUtils.isNotEmpty(volumeInfos)) {
getVolumeInfosResponseBuilder.addAllVolumeInfos(volumeInfos);
}
return getVolumeInfosResponseBuilder.build();
}

private List<VolumeInfoProto> convertToVolumeInfos(List<? extends DatanodeDetails> allNodes) {
List<VolumeInfoProto> result = new ArrayList<>();
for (DatanodeDetails datanode : allNodes) {
DatanodeInfo detail = (DatanodeInfo) datanode;
List<VolumeInfoProto> volumeInfos = detail.getVolumeInfos();
if (CollectionUtils.isNotEmpty(volumeInfos)) {
result.addAll(volumeInfos);
}
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
Expand Down Expand Up @@ -607,4 +608,9 @@ public String getMetrics(String query) throws IOException {
public void reconcileContainer(long id) throws IOException {
storageContainerLocationClient.reconcileContainer(id);
}

@Override
public StorageContainerLocationProtocolProtos.GetVolumeInfosResponseProto getVolumeInfos() throws IOException {
return storageContainerLocationClient.getVolumeInfos();
}
}
Loading