From 7af329eaee640a2d03d1c5040ad803af8b8eaca4 Mon Sep 17 00:00:00 2001 From: deveshsingh Date: Thu, 14 Dec 2023 18:54:35 +0530 Subject: [PATCH 1/4] HDDS-9883. Recon - Improve the performance of processing of IncrementalContainerReport requests from DN. --- .../hadoop/hdds/recon/ReconConfigKeys.java | 12 +++++ .../hdds/scm/server/ContainerReportQueue.java | 8 ++++ .../SCMDatanodeHeartbeatDispatcher.java | 13 +++++ .../apache/hadoop/ozone/recon/ReconUtils.java | 29 +++++++++++ .../recon/scm/ReconContainerManager.java | 4 ++ .../recon/scm/ReconContainerReportQueue.java | 48 +++++++++++++++++++ ...econIncrementalContainerReportHandler.java | 33 ++++++++----- .../ReconStorageContainerManagerFacade.java | 17 ++++++- ...econIncrementalContainerReportHandler.java | 15 +++++- 9 files changed, 164 insertions(+), 15 deletions(-) create mode 100644 hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java index 3571d39bc8a2..1631ef33fd8c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java @@ -82,4 +82,16 @@ private ReconConfigKeys() { public static final String OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD = "ozone.recon.task.safemode.wait.threshold"; + + public static final String OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY = + "ozone.recon.scmclient.rpc.timeout"; + + public static final long OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT = + 1 * 60 * 1000L; + + public static final String OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY = + "ozone.recon.scmclient.failover.max.retry"; + + public static final int + OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT = 3; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ContainerReportQueue.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ContainerReportQueue.java index b08b525a86c5..bffddff87b33 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ContainerReportQueue.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/ContainerReportQueue.java @@ -112,6 +112,9 @@ private boolean addIncrementalReport(ContainerReport val) { // 2. Add ICR report or merge to previous ICR List dataList = dataMap.get(uuidString); + if (mergeIcr(val, dataList)) { + return true; + } dataList.add(val); ++capacity; orderingQueue.add(uuidString); @@ -375,4 +378,9 @@ public int getAndResetDropCount(String type) { } return 0; } + + protected boolean mergeIcr(ContainerReport val, + List dataList) { + return false; + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java index aaadbbbcb955..38db618ef539 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMDatanodeHeartbeatDispatcher.java @@ -279,6 +279,7 @@ public LayoutReportFromDatanode(DatanodeDetails datanodeDetails, public interface ContainerReport { DatanodeDetails getDatanodeDetails(); ContainerReportType getType(); + void mergeReport(ContainerReport val); } /** @@ -334,6 +335,9 @@ public String getEventId() { return getDatanodeDetails().toString() + ", {type: " + getType() + ", size: " + getReport().getReportsList().size() + "}"; } + + @Override + public void mergeReport(ContainerReport nextReport) { } } /** @@ -374,6 +378,15 @@ public String getEventId() { return getDatanodeDetails().toString() + ", {type: " + getType() + ", size: " + getReport().getReportList().size() + "}"; } + + @Override + public void mergeReport(ContainerReport nextReport) { + if (nextReport.getType() == ContainerReportType.ICR) { + getReport().getReportList().addAll( + ((ReportFromDatanode) nextReport) + .getReport().getReportList()); + } + } } /** diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java index 0d0c57fbe36f..c5485610738e 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconUtils.java @@ -29,6 +29,9 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; import com.google.common.base.Preconditions; import com.google.inject.Singleton; @@ -36,7 +39,9 @@ import org.apache.hadoop.hdds.HddsUtils; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.ScmUtils; import org.apache.hadoop.hdds.scm.ha.SCMNodeDetails; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher; import org.apache.hadoop.hdds.utils.HddsServerUtil; import org.apache.hadoop.hdfs.web.URLConnectionFactory; import org.apache.hadoop.io.IOUtils; @@ -44,6 +49,9 @@ import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; + +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_CONTAINER_REPORT_QUEUE_SIZE_DEFAULT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_THREAD_POOL_SIZE_DEFAULT; import static org.apache.hadoop.hdds.server.ServerUtils.getDirectoryFromConfig; import static org.apache.hadoop.hdds.server.ServerUtils.getOzoneMetaDirPath; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_DB_DIR; @@ -51,9 +59,11 @@ import static org.jooq.impl.DSL.select; import static org.jooq.impl.DSL.using; +import org.apache.hadoop.ozone.recon.scm.ReconContainerReportQueue; import org.apache.hadoop.security.authentication.client.AuthenticationException; import org.hadoop.ozone.recon.schema.tables.daos.GlobalStatsDao; import org.hadoop.ozone.recon.schema.tables.pojos.GlobalStats; +import org.jetbrains.annotations.NotNull; import org.jooq.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,6 +86,25 @@ public static File getReconScmDbDir(ConfigurationSource conf) { return new ReconUtils().getReconDbDir(conf, OZONE_RECON_SCM_DB_DIR); } + @NotNull + public static List> initContainerReportQueue( + OzoneConfiguration configuration) { + int threadPoolSize = + configuration.getInt(ScmUtils.getContainerReportConfPrefix() + + ".thread.pool.size", + OZONE_SCM_EVENT_THREAD_POOL_SIZE_DEFAULT); + int queueSize = configuration.getInt( + ScmUtils.getContainerReportConfPrefix() + ".queue.size", + OZONE_SCM_EVENT_CONTAINER_REPORT_QUEUE_SIZE_DEFAULT); + List> queues = + new ArrayList<>(); + for (int i = 0; i < threadPoolSize; ++i) { + queues.add(new ReconContainerReportQueue(queueSize)); + } + return queues; + } + /** * Get configured Recon DB directory value based on config. If not present, * fallback to ozone.metadata.dirs diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java index d1d8373a29fd..3dde78a0fd64 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerManager.java @@ -456,4 +456,8 @@ public Map getPipelineToOpenContainer() { return pipelineToOpenContainer; } + @VisibleForTesting + public StorageContainerServiceProvider getScmClient() { + return scmClient; + } } diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java new file mode 100644 index 000000000000..0d8b13d73744 --- /dev/null +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.ozone.recon.scm; + +import org.apache.hadoop.hdds.scm.server.ContainerReportQueue; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.ContainerReport; + +import java.util.List; + +/** + * Customized queue to handle FCR and ICR from datanode optimally, + * avoiding duplicate FCR reports. + */ +public class ReconContainerReportQueue extends ContainerReportQueue { + + public ReconContainerReportQueue(int queueSize) { + super(queueSize); + } + + @Override + protected boolean mergeIcr(ContainerReport val, + List dataList) { + if (!dataList.isEmpty()) { + if (SCMDatanodeHeartbeatDispatcher.ContainerReportType.ICR + == dataList.get(dataList.size() - 1).getType()) { + dataList.get(dataList.size() - 1).mergeReport(val); + return true; + } + } + return false; + } +} diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java index 18d995d053aa..1f2b1d5cf249 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconIncrementalContainerReportHandler.java @@ -24,8 +24,8 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto; import org.apache.hadoop.hdds.scm.container.ContainerID; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException; import org.apache.hadoop.hdds.scm.container.IncrementalContainerReportHandler; import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.node.NodeManager; @@ -69,24 +69,33 @@ public void onMessage(final IncrementalContainerReportFromDatanode report, ReconContainerManager containerManager = (ReconContainerManager) getContainerManager(); + try { + containerManager.checkAndAddNewContainerBatch( + report.getReport().getReportList()); + } catch (Exception ioEx) { + LOG.error("Exception while checking and adding new container.", ioEx); + return; + } boolean success = true; for (ContainerReplicaProto replicaProto : report.getReport().getReportList()) { + ContainerID id = ContainerID.valueOf(replicaProto.getContainerID()); + ContainerInfo container = null; try { - final ContainerID id = ContainerID.valueOf( - replicaProto.getContainerID()); try { - containerManager.checkAndAddNewContainer(id, replicaProto.getState(), - report.getDatanodeDetails()); - } catch (Exception ioEx) { - LOG.error("Exception while checking and adding new container.", ioEx); - return; + container = getContainerManager().getContainer(id); + // Ensure we reuse the same ContainerID instance in containerInfo + id = container.containerID(); + } finally { + if (replicaProto.getState().equals( + ContainerReplicaProto.State.DELETED)) { + getNodeManager().removeContainer(dd, id); + } else { + getNodeManager().addContainer(dd, id); + } } - getNodeManager().addContainer(dd, id); processContainerReplica(dd, replicaProto, publisher); - } catch (ContainerNotFoundException e) { - success = false; - LOG.warn("Container {} not found!", replicaProto.getContainerID()); + success = true; } catch (NodeNotFoundException ex) { success = false; LOG.error("Received ICR from unknown datanode {}.", diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 464ec1a5ee85..037c35bd195f 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineActionHandler; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -99,6 +100,11 @@ import org.apache.hadoop.ozone.recon.tasks.ContainerSizeCountTask; import org.apache.hadoop.ozone.recon.tasks.ReconTaskConfig; import com.google.inject.Inject; + +import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT; +import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY; +import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT; +import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.RECON_SCM_CONFIG_PREFIX; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_EXEC_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_QUEUE_WAIT_THRESHOLD_DEFAULT; @@ -182,6 +188,15 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, .setSCM(this) .build(); this.ozoneConfiguration = getReconScmConfiguration(conf); + long scmClientRPCTimeOut = ozoneConfiguration.getLong( + OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY, + OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT); + int scmClientFailOverMaxRetryCount = ozoneConfiguration.getInt( + OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY, + OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT); + SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class); + scmClientConfig.setRpcTimeOut(scmClientRPCTimeOut); + scmClientConfig.setRetryCount(scmClientFailOverMaxRetryCount); this.scmStorageConfig = new ReconStorageConfig(conf, reconUtils); this.clusterMap = new NetworkTopologyImpl(conf); this.dbStore = DBStoreBuilder @@ -283,7 +298,7 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, ScmUtils.getContainerReportConfPrefix() + ".execute.wait.threshold", OZONE_SCM_EVENT_REPORT_EXEC_WAIT_THRESHOLD_DEFAULT); List> queues - = ScmUtils.initContainerReportQueue(ozoneConfiguration); + = ReconUtils.initContainerReportQueue(ozoneConfiguration); List executors = FixedThreadPoolWithAffinityExecutor.initializeExecutorPool( threadNamePrefix, queues); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconIncrementalContainerReportHandler.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconIncrementalContainerReportHandler.java index cb11d7060d78..f50acc09258f 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconIncrementalContainerReportHandler.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/scm/TestReconIncrementalContainerReportHandler.java @@ -30,7 +30,9 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.UUID; import java.util.concurrent.TimeoutException; @@ -66,18 +68,28 @@ public class TestReconIncrementalContainerReportHandler private HDDSLayoutVersionManager versionManager; @Test - public void testProcessICR() throws IOException, NodeNotFoundException { + public void testProcessICR() + throws IOException, NodeNotFoundException, TimeoutException { ContainerID containerID = ContainerID.valueOf(100L); DatanodeDetails datanodeDetails = randomDatanodeDetails(); IncrementalContainerReportFromDatanode reportMock = mock(IncrementalContainerReportFromDatanode.class); when(reportMock.getDatanodeDetails()).thenReturn(datanodeDetails); + + ContainerWithPipeline containerWithPipeline = getTestContainer( + containerID.getId(), OPEN); + List containerWithPipelineList = new ArrayList<>(); + containerWithPipelineList.add(containerWithPipeline); + ReconContainerManager containerManager = getContainerManager(); IncrementalContainerReportProto containerReport = getIncrementalContainerReportProto(containerID, State.OPEN, datanodeDetails.getUuidString()); when(reportMock.getReport()).thenReturn(containerReport); + when(getContainerManager().getScmClient() + .getExistContainerWithPipelinesInBatch(any( + ArrayList.class))).thenReturn(containerWithPipelineList); final String path = GenericTestUtils.getTempPath(UUID.randomUUID().toString()); @@ -99,7 +111,6 @@ public void testProcessICR() throws IOException, NodeNotFoundException { nodeManager.register(datanodeDetails, null, null); - ReconContainerManager containerManager = getContainerManager(); ReconIncrementalContainerReportHandler reconIcr = new ReconIncrementalContainerReportHandler(nodeManager, containerManager, SCMContext.emptyContext()); From 67ef32b40593bd56e8a6ed6f1b2f9c095397b5d7 Mon Sep 17 00:00:00 2001 From: deveshsingh Date: Fri, 15 Dec 2023 11:37:26 +0530 Subject: [PATCH 2/4] HDDS-9883. Recon - Improve the performance of processing of IncrementalContainerReport requests from DN. --- .../hadoop/hdds/recon/ReconConfigKeys.java | 12 --------- .../src/main/resources/ozone-default.xml | 26 +++++++++++++++++++ .../ozone/recon/ReconServerConfigKeys.java | 17 ++++++++++++ .../ReconStorageContainerManagerFacade.java | 20 +++++++++----- 4 files changed, 57 insertions(+), 18 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java index 1631ef33fd8c..3571d39bc8a2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/recon/ReconConfigKeys.java @@ -82,16 +82,4 @@ private ReconConfigKeys() { public static final String OZONE_RECON_TASK_SAFEMODE_WAIT_THRESHOLD = "ozone.recon.task.safemode.wait.threshold"; - - public static final String OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY = - "ozone.recon.scmclient.rpc.timeout"; - - public static final long OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT = - 1 * 60 * 1000L; - - public static final String OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY = - "ozone.recon.scmclient.failover.max.retry"; - - public static final int - OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT = 3; } diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index d0c8f4e2bd36..94c9d3a6f723 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -3118,6 +3118,32 @@ SCM snapshot. + + ozone.recon.scmclient.rpc.timeout + 1m + OZONE, RECON, SCM + + RpcClient timeout on waiting for the response from SCM when Recon connects to SCM. + + + + ozone.recon.scmclient.max.retry.timeout + 6s + OZONE, RECON, SCM + + Max retry timeout for SCM Client when Recon connects to SCM. This config is used to + dynamically compute the max retry count for SCM Client when failover happens. Check the + SCMClientConfig class getRetryCount method. + + + + ozone.recon.scmclient.failover.max.retry + 3 + OZONE, RECON, SCM + + Max retry count for SCM Client when failover happens. + + ozone.recon.om.socket.timeout 5s diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java index b3c601c4c1fc..ab87bda4412c 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServerConfigKeys.java @@ -168,6 +168,23 @@ public final class ReconServerConfigKeys { public static final String OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT = "1m"; + + public static final String OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY = + "ozone.recon.scmclient.rpc.timeout"; + + public static final String OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT = "1m"; + + public static final String OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_KEY = + "ozone.recon.scmclient.max.retry.timeout"; + + public static final String OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_DEFAULT = + "6s"; + + public static final String OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY = + "ozone.recon.scmclient.failover.max.retry"; + + public static final int + OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT = 3; /** * Private constructor for utility class. */ diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 037c35bd195f..4c264ae6b160 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -101,15 +101,17 @@ import org.apache.hadoop.ozone.recon.tasks.ReconTaskConfig; import com.google.inject.Inject; -import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT; -import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY; -import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT; -import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY; import static org.apache.hadoop.hdds.recon.ReconConfigKeys.RECON_SCM_CONFIG_PREFIX; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_EXEC_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_QUEUE_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.buildRpcServerStartMessage; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_KEY; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT; +import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INITIAL_DELAY_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_SNAPSHOT_TASK_INTERVAL_DEFAULT; @@ -188,15 +190,21 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, .setSCM(this) .build(); this.ozoneConfiguration = getReconScmConfiguration(conf); - long scmClientRPCTimeOut = ozoneConfiguration.getLong( + long scmClientRPCTimeOut = ozoneConfiguration.getTimeDuration( OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY, - OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT); + OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT, + TimeUnit.MILLISECONDS); + long scmClientMaxRetryTimeOut = ozoneConfiguration.getTimeDuration( + OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_KEY, + OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_DEFAULT, + TimeUnit.MILLISECONDS); int scmClientFailOverMaxRetryCount = ozoneConfiguration.getInt( OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY, OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT); SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class); scmClientConfig.setRpcTimeOut(scmClientRPCTimeOut); scmClientConfig.setRetryCount(scmClientFailOverMaxRetryCount); + scmClientConfig.setMaxRetryTimeout(scmClientMaxRetryTimeOut); this.scmStorageConfig = new ReconStorageConfig(conf, reconUtils); this.clusterMap = new NetworkTopologyImpl(conf); this.dbStore = DBStoreBuilder From db634246aa0800a69a8b51f25220bd8531fe8663 Mon Sep 17 00:00:00 2001 From: deveshsingh Date: Wed, 3 Jan 2024 12:52:25 +0530 Subject: [PATCH 3/4] HDDS-9883. Fixed review comments to set update hdds property value directly. --- .../apache/hadoop/ozone/OzoneConfigKeys.java | 8 ++++++++ .../recon/scm/ReconContainerReportQueue.java | 3 +-- .../ReconStorageContainerManagerFacade.java | 20 +++++++++++-------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index f124e24141f5..21c89cc3c8d4 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -668,6 +668,14 @@ public final class OzoneConfigKeys { public static final String OZONE_SCM_CLOSE_CONTAINER_WAIT_DURATION = "ozone.scm.close.container.wait.duration"; + public static final String HDDS_SCM_CLIENT_RPC_TIME_OUT = + "hdds.scmclient.rpc.timeout"; + public static final String HDDS_SCM_CLIENT_MAX_RETRY_TIMEOUT = + "hdds.scmclient.max.retry.timeout"; + public static final String HDDS_SCM_CLIENT_FAILOVER_MAX_RETRY = + "hdds.scmclient.failover.max.retry"; + + /** * There is no need to instantiate this class. */ diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java index 0d8b13d73744..8d5f92eda4ca 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconContainerReportQueue.java @@ -24,8 +24,7 @@ import java.util.List; /** - * Customized queue to handle FCR and ICR from datanode optimally, - * avoiding duplicate FCR reports. + * Customized queue to handle multiple ICR report together. */ public class ReconContainerReportQueue extends ContainerReportQueue { diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java index 4c264ae6b160..556c6194192f 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java @@ -76,7 +76,6 @@ import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineActionHandler; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; -import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; import org.apache.hadoop.hdds.server.events.EventQueue; @@ -105,6 +104,9 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_EXEC_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_EVENT_REPORT_QUEUE_WAIT_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdds.scm.server.StorageContainerManager.buildRpcServerStartMessage; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_FAILOVER_MAX_RETRY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_MAX_RETRY_TIMEOUT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_SCM_CLIENT_RPC_TIME_OUT; import static org.apache.hadoop.ozone.OzoneConsts.OZONE_URI_DELIMITER; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT; import static org.apache.hadoop.ozone.recon.ReconServerConfigKeys.OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY; @@ -190,21 +192,23 @@ public ReconStorageContainerManagerFacade(OzoneConfiguration conf, .setSCM(this) .build(); this.ozoneConfiguration = getReconScmConfiguration(conf); - long scmClientRPCTimeOut = ozoneConfiguration.getTimeDuration( + long scmClientRPCTimeOut = conf.getTimeDuration( OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_KEY, OZONE_RECON_SCM_CLIENT_RPC_TIME_OUT_DEFAULT, TimeUnit.MILLISECONDS); - long scmClientMaxRetryTimeOut = ozoneConfiguration.getTimeDuration( + long scmClientMaxRetryTimeOut = conf.getTimeDuration( OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_KEY, OZONE_RECON_SCM_CLIENT_MAX_RETRY_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS); - int scmClientFailOverMaxRetryCount = ozoneConfiguration.getInt( + int scmClientFailOverMaxRetryCount = conf.getInt( OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_KEY, OZONE_RECON_SCM_CLIENT_FAILOVER_MAX_RETRY_DEFAULT); - SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class); - scmClientConfig.setRpcTimeOut(scmClientRPCTimeOut); - scmClientConfig.setRetryCount(scmClientFailOverMaxRetryCount); - scmClientConfig.setMaxRetryTimeout(scmClientMaxRetryTimeOut); + + conf.setLong(HDDS_SCM_CLIENT_RPC_TIME_OUT, scmClientRPCTimeOut); + conf.setLong(HDDS_SCM_CLIENT_MAX_RETRY_TIMEOUT, scmClientMaxRetryTimeOut); + conf.setLong(HDDS_SCM_CLIENT_FAILOVER_MAX_RETRY, + scmClientFailOverMaxRetryCount); + this.scmStorageConfig = new ReconStorageConfig(conf, reconUtils); this.clusterMap = new NetworkTopologyImpl(conf); this.dbStore = DBStoreBuilder From e6af3e83e79c7071822cf69bbca30d9290f01e31 Mon Sep 17 00:00:00 2001 From: deveshsingh Date: Thu, 4 Jan 2024 15:21:19 +0530 Subject: [PATCH 4/4] HDDS-9883. Fixed failure of TestOzoneConfigurationFields test case. --- .../apache/hadoop/ozone/TestOzoneConfigurationFields.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java index cb29d61e1a4c..1a437be8131b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestOzoneConfigurationFields.java @@ -141,7 +141,10 @@ private void addPropertiesNotInXml() { ScmConfigKeys.OZONE_SCM_PIPELINE_PLACEMENT_IMPL_KEY, ScmConfigKeys.OZONE_SCM_HA_PREFIX, S3GatewayConfigKeys.OZONE_S3G_FSO_DIRECTORY_CREATION_ENABLED, - HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT + HddsConfigKeys.HDDS_DATANODE_VOLUME_MIN_FREE_SPACE_PERCENT, + OzoneConfigKeys.HDDS_SCM_CLIENT_RPC_TIME_OUT, + OzoneConfigKeys.HDDS_SCM_CLIENT_MAX_RETRY_TIMEOUT, + OzoneConfigKeys.HDDS_SCM_CLIENT_FAILOVER_MAX_RETRY )); } }