diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmTopologyClient.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmTopologyClient.java index 5e33eefde6c5..670a8acb7614 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmTopologyClient.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/client/ScmTopologyClient.java @@ -66,6 +66,12 @@ public NetworkTopology getClusterMap() { "ScmBlockLocationClient must have been initialized already."); } + public boolean refetchClusterTree(ConfigurationSource conf) { + // this would perform a force refresh of the network topology tree + // information from SCM. + return checkAndRefresh(conf); + } + public void start(ConfigurationSource conf) throws IOException { final InnerNode initialTopology = scmBlockLocationProtocol.getNetworkTopology(); @@ -118,7 +124,7 @@ public static Duration parseRefreshDuration(ConfigurationSource conf) { return Duration.ofMillis(refreshDurationInMs); } - private synchronized void checkAndRefresh(ConfigurationSource conf) { + private synchronized boolean checkAndRefresh(ConfigurationSource conf) { InnerNode current = (InnerNode) cache.get().getNode(ROOT); try { InnerNode newTopology = scmBlockLocationProtocol.getNetworkTopology(); @@ -128,10 +134,12 @@ private synchronized void checkAndRefresh(ConfigurationSource conf) { ScmConfigKeys.OZONE_SCM_NETWORK_TOPOLOGY_SCHEMA_FILE_DEFAULT), newTopology)); LOG.info("Updated network topology fetched from SCM: {}.", newTopology); + return true; } } catch (IOException e) { throw new UncheckedIOException( "Error fetching updated network topology from SCM", e); } + return false; } } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java index b4777c7a016b..c62c1581041f 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java @@ -273,6 +273,7 @@ public static boolean isReadOnly( case SetSafeMode: case PrintCompactionLogDag: case GetSnapshotInfo: + case RefetchNetworkTopologyTree: return true; case CreateVolume: case SetVolumeProperty: diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java index ab3f576d4492..642665445a51 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OzoneManagerProtocol.java @@ -1134,4 +1134,12 @@ void setTimes(OmKeyArgs keyArgs, long mtime, long atime) */ boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException; + + /** + * API for OM to force fetch the network topology tree information from SCM + * without having to rely on ozone.om.network.topology.refresh.duration. + * @return status of the refetch operation (success/failure). + * @throws IOException + */ + boolean refetchNetworkTopologyTree() throws IOException; } diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java index 08fa029833e7..472ad2812de3 100644 --- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java +++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java @@ -178,6 +178,8 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RecoverTrashResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyResponse; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeRequest; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RemoveAclRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RemoveAclResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RenameKeyRequest; @@ -2563,6 +2565,19 @@ public boolean setSafeMode(SafeModeAction action, boolean isChecked) return setSafeModeResponse.getResponse(); } + @Override + public boolean refetchNetworkTopologyTree() throws IOException { + final RefetchNetworkTopologyTreeRequest.Builder requestBuilder = + RefetchNetworkTopologyTreeRequest.newBuilder(); + final OMRequest omRequest = createOMRequest(Type.RefetchNetworkTopologyTree) + .setRefetchNetworkTopologyTreeRequest(requestBuilder) + .build(); + final OMResponse omResponse = submitRequest(omRequest); + final RefetchNetworkTopologyTreeResponse resp = + handleError(omResponse).getRefetchNetworkTopologyTreeResponse(); + return resp.getStatus(); + } + private SafeMode toProtoBuf(SafeModeAction action) { switch (action) { case ENTER: diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestGetClusterTreeInformation.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestGetClusterTreeInformation.java index 9becc8b2591c..7250286d6ac1 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestGetClusterTreeInformation.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestGetClusterTreeInformation.java @@ -17,9 +17,20 @@ */ package org.apache.hadoop.ozone; +import org.apache.hadoop.hdds.protocol.DatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.LayoutVersionProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.scm.net.InnerNode; +import org.apache.hadoop.hdds.scm.node.SCMNodeManager; import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB; import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider; +import org.apache.hadoop.ozone.client.ObjectStore; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.container.upgrade.UpgradeUtils; +import org.apache.hadoop.ozone.om.OzoneManager; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import org.apache.hadoop.ozone.protocol.commands.RegisteredCommand; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; @@ -31,10 +42,19 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration; import java.io.IOException; +import java.util.Arrays; import java.util.concurrent.TimeoutException; import static org.apache.hadoop.hdds.scm.net.NetConstants.ROOT; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.apache.hadoop.hdds.scm.HddsTestUtils.getRandomPipelineReports; +import static org.apache.hadoop.hdds.scm.HddsTestUtils.createNodeReport; +import static org.apache.hadoop.hdds.scm.HddsTestUtils.createMetadataStorageReport; +import static org.apache.hadoop.hdds.scm.HddsTestUtils.createStorageReport; +import static org.apache.hadoop.hdds.protocol.MockDatanodeDetails.randomDatanodeDetails; +import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMRegisteredResponseProto.ErrorCode.success; /** * @@ -50,6 +70,11 @@ public class TestGetClusterTreeInformation { private static MiniOzoneCluster cluster; private static OzoneConfiguration conf; private static StorageContainerManager scm; + private static OzoneManager om; + private static SCMNodeManager nodeManager; + private static OzoneClient client; + private static ObjectStore store; + private static OzoneManagerProtocol omClient; @BeforeAll public static void init() throws IOException, TimeoutException, @@ -62,6 +87,11 @@ public static void init() throws IOException, TimeoutException, .build(); cluster.waitForClusterToBeReady(); scm = cluster.getStorageContainerManager(); + om = cluster.getOzoneManager(); + nodeManager = (SCMNodeManager) scm.getScmNodeManager(); + client = cluster.newClient(); + store = client.getObjectStore(); + omClient = store.getClientProxy().getOzoneManagerClient(); } @AfterAll @@ -84,4 +114,46 @@ public void testGetClusterTreeInformation() throws IOException { InnerNode actualInnerNode = scmBlockLocationClient.getNetworkTopology(); assertEquals(expectedInnerNode, actualInnerNode); } + + @Test + public void testForceFetchClusterTreeInformation() throws IOException { + omClient.refetchNetworkTopologyTree(); + + DatanodeDetails datanode1ToAdd = registerDatanode(); + // OM's copy of network topology does not contain the newly registered DN + // information at first. + assertFalse(om.getClusterMap().contains(datanode1ToAdd)); + + omClient.refetchNetworkTopologyTree(); + // The API fetches network topology information from SCM on demand, + // without having to rely on ozone.om.network.topology.refresh.duration. + // Now, OM's copy of network topology should contain the newly added DN. + assertTrue(om.getClusterMap().contains(datanode1ToAdd)); + + DatanodeDetails datanode2ToAdd = registerDatanode(); + assertFalse(om.getClusterMap().contains(datanode2ToAdd)); + + omClient.refetchNetworkTopologyTree(); + assertTrue(om.getClusterMap().contains(datanode2ToAdd)); + } + + private DatanodeDetails registerDatanode() { + DatanodeDetails details = randomDatanodeDetails(); + + StorageReportProto storageReport = + createStorageReport(details.getUuid(), details.getNetworkFullPath(), + Long.MAX_VALUE); + MetadataStorageReportProto metadataStorageReport = + createMetadataStorageReport(details.getNetworkFullPath(), + Long.MAX_VALUE); + + LayoutVersionProto layout = UpgradeUtils.defaultLayoutVersionProto(); + RegisteredCommand cmd = nodeManager.register(randomDatanodeDetails(), + createNodeReport(Arrays.asList(storageReport), + Arrays.asList(metadataStorageReport)), getRandomPipelineReports(), + layout); + + assertEquals(success, cmd.getError()); + return cmd.getDatanode(); + } } diff --git a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto index b0d26020c8d2..96e92e695fbf 100644 --- a/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto +++ b/hadoop-ozone/interface-client/src/main/proto/OmClientProtocol.proto @@ -147,6 +147,7 @@ enum Type { ListStatusLight = 129; GetSnapshotInfo = 130; RenameSnapshot = 131; + RefetchNetworkTopologyTree = 132; } enum SafeMode { @@ -283,6 +284,7 @@ message OMRequest { optional SetSnapshotPropertyRequest SetSnapshotPropertyRequest = 127; optional SnapshotInfoRequest SnapshotInfoRequest = 128; optional RenameSnapshotRequest RenameSnapshotRequest = 129; + optional RefetchNetworkTopologyTreeRequest RefetchNetworkTopologyTreeRequest = 130; } message OMResponse { @@ -406,6 +408,7 @@ message OMResponse { optional SnapshotInfoResponse SnapshotInfoResponse = 130; optional OMLockDetailsProto omLockDetails = 131; optional RenameSnapshotResponse RenameSnapshotResponse = 132; + optional RefetchNetworkTopologyTreeResponse RefetchNetworkTopologyTreeResponse = 133; } enum Status { @@ -2129,6 +2132,13 @@ message OMLockDetailsProto { optional uint64 writeLockNanos = 4; } +message RefetchNetworkTopologyTreeRequest { +} + +message RefetchNetworkTopologyTreeResponse { + optional bool status = 1; +} + /** The OM service that takes care of Ozone namespace. */ diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java index 680ebe063156..fead0f6c72b0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java @@ -1094,11 +1094,17 @@ private void stopSecretManager() { } } + @Override public UUID refetchSecretKey() { secretKeyClient.refetchSecretKey(); return secretKeyClient.getCurrentSecretKey().getId(); } + @Override + public boolean refetchNetworkTopologyTree() { + return scmTopologyClient.refetchClusterTree(configuration); + } + @VisibleForTesting public void startSecretManager() { try { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java index 5acb9f365107..90c288757065 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerRequestHandler.java @@ -95,6 +95,7 @@ import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrintCompactionLogDagRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrintCompactionLogDagResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchSecretKeyResponse; +import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.RefetchNetworkTopologyTreeResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoBucketRequest; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoBucketResponse; import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.InfoVolumeRequest; @@ -371,6 +372,10 @@ public OMResponse handleReadRequest(OMRequest request) { getSnapshotInfo(request.getSnapshotInfoRequest()); responseBuilder.setSnapshotInfoResponse(snapshotInfoResponse); break; + case RefetchNetworkTopologyTree: + responseBuilder.setRefetchNetworkTopologyTreeResponse( + refetchNetworkTopologyTree()); + break; default: responseBuilder.setSuccess(false); responseBuilder.setMessage("Unrecognized Command Type: " + cmdType); @@ -1477,6 +1482,15 @@ private SetSafeModeResponse setSafeMode( .build(); } + private RefetchNetworkTopologyTreeResponse refetchNetworkTopologyTree() { + boolean status = impl.refetchNetworkTopologyTree(); + RefetchNetworkTopologyTreeResponse response = + RefetchNetworkTopologyTreeResponse.newBuilder() + .setStatus(status) + .build(); + return response; + } + private SafeModeAction toSafeModeAction( OzoneManagerProtocolProtos.SafeMode safeMode) { switch (safeMode) { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FetchNetworkTopologyTreeSubCommand.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FetchNetworkTopologyTreeSubCommand.java new file mode 100644 index 000000000000..a551ec8e67f4 --- /dev/null +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/FetchNetworkTopologyTreeSubCommand.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.ozone.admin.om; + +import java.io.IOException; +import java.util.concurrent.Callable; +import org.apache.hadoop.hdds.cli.HddsVersionProvider; +import org.apache.hadoop.ozone.om.protocol.OzoneManagerProtocol; +import picocli.CommandLine; + +/** + * Handler of ozone admin om fetch-topology-tree command. + */ +@CommandLine.Command( + name = "fetch-topology-tree", + description = "CLI command for OM to force fetch the latest network " + + "topology tree information from SCM.", + mixinStandardHelpOptions = true, + versionProvider = HddsVersionProvider.class +) +public class FetchNetworkTopologyTreeSubCommand implements Callable { + @CommandLine.ParentCommand + private OMAdmin parent; + + @CommandLine.Option( + names = {"--service-id"}, + description = "Ozone Manager Service ID", + required = false + ) + private String omServiceId; + + @Override + public Void call() throws Exception { + try (OzoneManagerProtocol client = parent.createOmClient(omServiceId)) { + boolean status = false; + try { + status = client.refetchNetworkTopologyTree(); + } catch (IOException e) { + System.err.println("Force fetching network topology tree information " + + "has failed: " + e.getMessage()); + } + if (status) { + System.out.println( + "Force fetching network topology tree information " + + "is complete."); + } + } + return null; + } +} diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java index ce7d4ed7a7ca..c4ce19af65a4 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/admin/om/OMAdmin.java @@ -60,7 +60,8 @@ DecommissionOMSubcommand.class, UpdateRangerSubcommand.class, TransferOmLeaderSubCommand.class, - FetchKeySubCommand.class + FetchKeySubCommand.class, + FetchNetworkTopologyTreeSubCommand.class }) @MetaInfServices(SubcommandWithParent.class) public class OMAdmin extends GenericCli implements SubcommandWithParent {