Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ public NetworkTopology getClusterMap() {
"ScmBlockLocationClient must have been initialized already.");
}

public boolean refetchClusterTree(ConfigurationSource conf) {
// this would perform a force refresh of the network topology tree
// information from SCM.
return checkAndRefresh(conf);
}

public void start(ConfigurationSource conf) throws IOException {
final InnerNode initialTopology =
scmBlockLocationProtocol.getNetworkTopology();
Expand Down Expand Up @@ -118,7 +124,7 @@ public static Duration parseRefreshDuration(ConfigurationSource conf) {
return Duration.ofMillis(refreshDurationInMs);
}

private synchronized void checkAndRefresh(ConfigurationSource conf) {
private synchronized boolean checkAndRefresh(ConfigurationSource conf) {
InnerNode current = (InnerNode) cache.get().getNode(ROOT);
try {
InnerNode newTopology = scmBlockLocationProtocol.getNetworkTopology();
Expand All @@ -128,10 +134,12 @@ private synchronized void checkAndRefresh(ConfigurationSource conf) {
ScmConfigKeys.OZONE_SCM_NETWORK_TOPOLOGY_SCHEMA_FILE_DEFAULT),
newTopology));
LOG.info("Updated network topology fetched from SCM: {}.", newTopology);
return true;
}
} catch (IOException e) {
throw new UncheckedIOException(
"Error fetching updated network topology from SCM", e);
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ public static boolean isReadOnly(
case SetSafeMode:
case PrintCompactionLogDag:
case GetSnapshotInfo:
case RefetchNetworkTopologyTree:
return true;
case CreateVolume:
case SetVolumeProperty:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1134,4 +1134,12 @@ void setTimes(OmKeyArgs keyArgs, long mtime, long atime)
*/
boolean setSafeMode(SafeModeAction action, boolean isChecked)
throws IOException;

/**
* API for OM to force fetch the network topology tree information from SCM
* without having to rely on ozone.om.network.topology.refresh.duration.
* @return status of the refetch operation (success/failure).
* @throws IOException
*/
boolean refetchNetworkTopologyTree() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RecoverTrashResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RemoveAclRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RemoveAclResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RenameKeyRequest;
Expand Down Expand Up @@ -2563,6 +2565,19 @@ public boolean setSafeMode(SafeModeAction action, boolean isChecked)
return setSafeModeResponse.getResponse();
}

@Override
public boolean refetchNetworkTopologyTree() throws IOException {
final RefetchNetworkTopologyTreeRequest.Builder requestBuilder =
RefetchNetworkTopologyTreeRequest.newBuilder();
final OMRequest omRequest = createOMRequest(Type.RefetchNetworkTopologyTree)
.setRefetchNetworkTopologyTreeRequest(requestBuilder)
.build();
final OMResponse omResponse = submitRequest(omRequest);
final RefetchNetworkTopologyTreeResponse resp =
handleError(omResponse).getRefetchNetworkTopologyTreeResponse();
return resp.getStatus();
}

private SafeMode toProtoBuf(SafeModeAction action) {
switch (action) {
case ENTER:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,20 @@
*/
package org.apache.hadoop.ozone;

import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto;
import org.apache.hadoop.hdds.scm.net.InnerNode;
import org.apache.hadoop.hdds.scm.node.SCMNodeManager;
import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
import org.apache.hadoop.ozone.client.ObjectStore;
import org.apache.hadoop.ozone.client.OzoneClient;
import org.apache.hadoop.ozone.container.upgrade.UpgradeUtils;
import org.apache.hadoop.ozone.om.OzoneManager;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Test;
Expand All @@ -31,10 +42,19 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;

import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.TimeoutException;

import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.getRandomPipelineReports;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.createNodeReport;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.createMetadataStorageReport;
import static org.apache.hadoop.hdds.scm.HddsTestUtils.createStorageReport;
import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails;
import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode.success;

/**
*
Expand All @@ -50,6 +70,11 @@ public class TestGetClusterTreeInformation {
private static MiniOzoneCluster cluster;
private static OzoneConfiguration conf;
private static StorageContainerManager scm;
private static OzoneManager om;
private static SCMNodeManager nodeManager;
private static OzoneClient client;
private static ObjectStore store;
private static OzoneManagerProtocol omClient;

@BeforeAll
public static void init() throws IOException, TimeoutException,
Expand All @@ -62,6 +87,11 @@ public static void init() throws IOException, TimeoutException,
.build();
cluster.waitForClusterToBeReady();
scm = cluster.getStorageContainerManager();
om = cluster.getOzoneManager();
nodeManager = (SCMNodeManager) scm.getScmNodeManager();
client = cluster.newClient();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

client needs to be closed in shutdown().

store = client.getObjectStore();
omClient = store.getClientProxy().getOzoneManagerClient();
}

@AfterAll
Expand All @@ -84,4 +114,46 @@ public void testGetClusterTreeInformation() throws IOException {
InnerNode actualInnerNode = scmBlockLocationClient.getNetworkTopology();
assertEquals(expectedInnerNode, actualInnerNode);
}

@Test
public void testForceFetchClusterTreeInformation() throws IOException {
omClient.refetchNetworkTopologyTree();

DatanodeDetails datanode1ToAdd = registerDatanode();
// OM's copy of network topology does not contain the newly registered DN
// information at first.
assertFalse(om.getClusterMap().contains(datanode1ToAdd));

omClient.refetchNetworkTopologyTree();
// The API fetches network topology information from SCM on demand,
// without having to rely on ozone.om.network.topology.refresh.duration.
// Now, OM's copy of network topology should contain the newly added DN.
assertTrue(om.getClusterMap().contains(datanode1ToAdd));

DatanodeDetails datanode2ToAdd = registerDatanode();
assertFalse(om.getClusterMap().contains(datanode2ToAdd));

omClient.refetchNetworkTopologyTree();
assertTrue(om.getClusterMap().contains(datanode2ToAdd));
}

private DatanodeDetails registerDatanode() {
DatanodeDetails details = randomDatanodeDetails();

StorageReportProto storageReport =
createStorageReport(details.getUuid(), details.getNetworkFullPath(),
Long.MAX_VALUE);
MetadataStorageReportProto metadataStorageReport =
createMetadataStorageReport(details.getNetworkFullPath(),
Long.MAX_VALUE);

LayoutVersionProto layout = UpgradeUtils.defaultLayoutVersionProto();
RegisteredCommand cmd = nodeManager.register(randomDatanodeDetails(),
createNodeReport(Arrays.asList(storageReport),
Arrays.asList(metadataStorageReport)), getRandomPipelineReports(),
layout);

assertEquals(success, cmd.getError());
return cmd.getDatanode();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ enum Type {
ListStatusLight = 129;
GetSnapshotInfo = 130;
RenameSnapshot = 131;
RefetchNetworkTopologyTree = 132;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's omit Tree to make it a bit shorter. (Also in request/response, method and command names.)

}

enum SafeMode {
Expand Down Expand Up @@ -283,6 +284,7 @@ message OMRequest {
optional SetSnapshotPropertyRequest SetSnapshotPropertyRequest = 127;
optional SnapshotInfoRequest SnapshotInfoRequest = 128;
optional RenameSnapshotRequest RenameSnapshotRequest = 129;
optional RefetchNetworkTopologyTreeRequest RefetchNetworkTopologyTreeRequest = 130;
}

message OMResponse {
Expand Down Expand Up @@ -406,6 +408,7 @@ message OMResponse {
optional SnapshotInfoResponse SnapshotInfoResponse = 130;
optional OMLockDetailsProto omLockDetails = 131;
optional RenameSnapshotResponse RenameSnapshotResponse = 132;
optional RefetchNetworkTopologyTreeResponse RefetchNetworkTopologyTreeResponse = 133;
}

enum Status {
Expand Down Expand Up @@ -2129,6 +2132,13 @@ message OMLockDetailsProto {
optional uint64 writeLockNanos = 4;
}

message RefetchNetworkTopologyTreeRequest {
}

message RefetchNetworkTopologyTreeResponse {
optional bool status = 1;
}

/**
The OM service that takes care of Ozone namespace.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1094,11 +1094,17 @@ private void stopSecretManager() {
}
}

@Override
public UUID refetchSecretKey() {
secretKeyClient.refetchSecretKey();
return secretKeyClient.getCurrentSecretKey().getId();
}

@Override
public boolean refetchNetworkTopologyTree() {
return scmTopologyClient.refetchClusterTree(configuration);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think admin privilege should be required to make the request.

}

@VisibleForTesting
public void startSecretManager() {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrintCompactionLogDagRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrintCompactionLogDagResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoBucketRequest;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoBucketResponse;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoVolumeRequest;
Expand Down Expand Up @@ -371,6 +372,10 @@ public OMResponse handleReadRequest(OMRequest request) {
getSnapshotInfo(request.getSnapshotInfoRequest());
responseBuilder.setSnapshotInfoResponse(snapshotInfoResponse);
break;
case RefetchNetworkTopologyTree:
responseBuilder.setRefetchNetworkTopologyTreeResponse(
refetchNetworkTopologyTree());
break;
default:
responseBuilder.setSuccess(false);
responseBuilder.setMessage("Unrecognized Command Type: " + cmdType);
Expand Down Expand Up @@ -1477,6 +1482,15 @@ private SetSafeModeResponse setSafeMode(
.build();
}

private RefetchNetworkTopologyTreeResponse refetchNetworkTopologyTree() {
boolean status = impl.refetchNetworkTopologyTree();
RefetchNetworkTopologyTreeResponse response =
RefetchNetworkTopologyTreeResponse.newBuilder()
.setStatus(status)
.build();
return response;
}

private SafeModeAction toSafeModeAction(
OzoneManagerProtocolProtos.SafeMode safeMode) {
switch (safeMode) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package org.apache.hadoop.ozone.admin.om;

import java.io.IOException;
import java.util.concurrent.Callable;
import org.apache.hadoop.hdds.cli.HddsVersionProvider;
import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol;
import picocli.CommandLine;

/**
* Handler of ozone admin om fetch-topology-tree command.
*/
@CommandLine.Command(
name = "fetch-topology-tree",
description = "CLI command for OM to force fetch the latest network " +
"topology tree information from SCM.",
mixinStandardHelpOptions = true,
versionProvider = HddsVersionProvider.class
)
public class FetchNetworkTopologyTreeSubCommand implements Callable<Void> {
@CommandLine.ParentCommand
private OMAdmin parent;

@CommandLine.Option(
names = {"--service-id"},
description = "Ozone Manager Service ID",
required = false
)
private String omServiceId;

@Override
public Void call() throws Exception {
try (OzoneManagerProtocol client = parent.createOmClient(omServiceId)) {
boolean status = false;
try {
status = client.refetchNetworkTopologyTree();
} catch (IOException e) {
System.err.println("Force fetching network topology tree information " +
"has failed: " + e.getMessage());
}
if (status) {
System.out.println(
"Force fetching network topology tree information " +
"is complete.");
}
Comment on lines +54 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this output a bit too verbose.

}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@
DecommissionOMSubcommand.class,
UpdateRangerSubcommand.class,
TransferOmLeaderSubCommand.class,
FetchKeySubCommand.class
FetchKeySubCommand.class,
FetchNetworkTopologyTreeSubCommand.class
})
@MetaInfServices(SubcommandWithParent.class)
public class OMAdmin extends GenericCli implements SubcommandWithParent {
Expand Down