From e7c0cca0d16a4cfc5308f21f6f5f4b569dc56155 Mon Sep 17 00:00:00 2001 From: XiChen <32928346+xichen01@users.noreply.github.com> Date: Sun, 6 Apr 2025 16:21:43 +0800 Subject: [PATCH 1/3] HDDS-12772. Speed up the building of MiniOzoneCluster --- .../apache/hadoop/hdds/HddsConfigKeys.java | 8 +++++ .../apache/hadoop/hdds/scm/ScmConfigKeys.java | 4 +++ .../apache/hadoop/ozone/OzoneConfigKeys.java | 2 ++ .../src/main/resources/ozone-default.xml | 31 +++++++++++++++++++ .../common/statemachine/StateContext.java | 8 +++-- .../server/ratis/XceiverServerRatis.java | 10 ++++++ .../hadoop/hdds/utils/HddsServerUtil.java | 29 +++++++++++++++++ .../apache/hadoop/hdds/scm/ha/RatisUtil.java | 8 +++++ .../hdds/upgrade/TestScmHAFinalization.java | 1 + ...estDatanodeHddsVolumeFailureDetection.java | 2 +- .../recon/TestReconAndAdminContainerCLI.java | 2 +- .../hadoop/ozone/MiniOzoneClusterImpl.java | 19 ++++++++++++ .../hadoop/ozone/UniformDatanodesFactory.java | 7 ++++- 13 files changed, 126 insertions(+), 5 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 370c480315da..c76cafda55c2 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -27,10 +27,18 @@ public final class HddsConfigKeys { "hdds.heartbeat.interval"; public static final String HDDS_HEARTBEAT_INTERVAL_DEFAULT = "30s"; + public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL = + "hdds.initial.heartbeat.interval"; + public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT = + "2s"; public static final String HDDS_RECON_HEARTBEAT_INTERVAL = "hdds.recon.heartbeat.interval"; public static final String HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT = "60s"; + public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL = + "hdds.recon.initial.heartbeat.interval"; + public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT = + "2s"; public static final String HDDS_NODE_REPORT_INTERVAL = "hdds.node.report.interval"; public static final String HDDS_NODE_REPORT_INTERVAL_DEFAULT = diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index c2f79a786570..34848a9d5f73 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -635,6 +635,10 @@ public final class ScmConfigKeys { public static final String HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL = "hdds.container.ratis.statemachine.write.wait.interval"; public static final long HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT = 10 * 60 * 1000_000_000L; + + public static final String OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT + = "ozone.scm.ha.raft.server.rpc.first-election.timeout"; + /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 5dc4e15fc0d6..4db78e644eaf 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -415,6 +415,8 @@ public final class OzoneConfigKeys { ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY; public static final long HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT = ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT; + public static final String HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY = + "hdds.ratis.leader.first.election.minimum.timeout.duration"; public static final String HDDS_DATANODE_PLUGINS_KEY = "hdds.datanode.plugins"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index e2a7b293556f..c6f17586173e 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -347,6 +347,14 @@ Default is 5s. + + hdds.ratis.leader.first.election.minimum.timeout.duration + + OZONE, RATIS, MANAGEMENT + ratis Minimum timeout for the first election of a leader. + If not configured, fallback to hdds.ratis.leader.election.minimum.timeout.duration. + + hdds.node.report.interval 60000ms @@ -1154,6 +1162,14 @@ if the default value for this config is not used. + + hdds.initial.heartbeat.interval + 2s + OZONE, MANAGEMENT + + Heartbeat interval used during Datanode initialization for Datanode. + + hdds.recon.heartbeat.interval 60s @@ -1162,6 +1178,14 @@ The heartbeat interval from a Datanode to Recon. + + hdds.recon.initial.heartbeat.interval + 60s + OZONE, MANAGEMENT, RECON + + Heartbeat interval used during Datanode initialization for Recon. + + ozone.scm.heartbeat.log.warn.interval.count 10 @@ -3834,6 +3858,13 @@ election. Default is 1s. + ozone.scm.ha.raft.server.rpc.first-election.timeout + + SCM, OZONE, HA, RATIS + ratis timeout for the first election of a leader. + If not configured, fallback to ozone.scm.ha.ratis.leader.election.timeout. + + ozone.scm.ha.ratis.server.leaderelection.pre-vote true SCM, OZONE, HA, RATIS diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index 91cfaa5a21a0..4060902dd22e 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -18,9 +18,11 @@ package org.apache.hadoop.ozone.container.common.statemachine; import static java.lang.Math.min; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.getInitialReconHeartbeatInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getLogWarnInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getReconHeartbeatInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmHeartbeatInterval; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmInitialHeartbeatInterval; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -149,9 +151,9 @@ public class StateContext { * real HB frequency after scm registration. With this method the * initial registration could be significant faster. */ - private final AtomicLong heartbeatFrequency = new AtomicLong(2000); + private final AtomicLong heartbeatFrequency; - private final AtomicLong reconHeartbeatFrequency = new AtomicLong(2000); + private final AtomicLong reconHeartbeatFrequency; private final int maxCommandQueueLimit; @@ -192,6 +194,8 @@ public StateContext(ConfigurationSource conf, fullReportTypeList = new ArrayList<>(); type2Reports = new HashMap<>(); this.threadNamePrefix = threadNamePrefix; + heartbeatFrequency = new AtomicLong(getScmInitialHeartbeatInterval(conf)); + reconHeartbeatFrequency = new AtomicLong(getInitialReconHeartbeatInterval(conf)); initReportTypeCollection(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index ced987c814b5..5dd61302b5b8 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; import static org.apache.ratis.util.Preconditions.assertTrue; import com.google.common.annotations.VisibleForTesting; @@ -387,6 +388,15 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) { leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200; RaftServerConfigKeys.Rpc.setTimeoutMax(properties, TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS)); + + long firstElectionDuration = conf.getTimeDuration(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, + -1, TimeUnit.MILLISECONDS); + if (firstElectionDuration > 0) { + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(properties, TimeDuration.valueOf( + firstElectionDuration, TimeUnit.MILLISECONDS)); + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(properties, TimeDuration.valueOf( + firstElectionDuration + 200, TimeUnit.MILLISECONDS)); + } } private void setTimeoutForRetryCache(RaftProperties properties) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java index eeb65cb48ea7..a48a7a33a7da 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java @@ -19,8 +19,12 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdds.HddsUtils.getHostNameFromConfigKeys; import static org.apache.hadoop.hdds.HddsUtils.getPortNumberFromConfigKeys; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; @@ -266,6 +270,19 @@ public static long getScmHeartbeatInterval(ConfigurationSource conf) { HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); } + + /** + * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to + * SCM. + * + * @param conf - Ozone Config + * @return - HB interval in milli seconds. + */ + public static long getScmInitialHeartbeatInterval(ConfigurationSource conf) { + return conf.getTimeDuration(HDDS_INITIAL_HEARTBEAT_INTERVAL, + HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } + /** * Heartbeat Interval - Defines the heartbeat frequency from a datanode to * Recon. @@ -278,6 +295,18 @@ public static long getReconHeartbeatInterval(ConfigurationSource conf) { HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); } + /** + * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to + * Recon. + * + * @param conf - Ozone Config + * @return - HB interval in milli seconds. + */ + public static long getInitialReconHeartbeatInterval(ConfigurationSource conf) { + return conf.getTimeDuration(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, + HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } + /** * Get the Stale Node interval, which is used by SCM to flag a datanode as * stale, if the heartbeat from that node has been missing for this duration. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java index 0d65fff1d24b..a4fbf8626e70 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java @@ -138,6 +138,14 @@ private static void setRaftRpcProperties(final RaftProperties properties, OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS)); + long firstElectionTimeout = ozoneConf.getTimeDuration( + ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, -1, TimeUnit.MILLISECONDS); + if (firstElectionTimeout > 0) { + Rpc.setFirstElectionTimeoutMin( + properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS)); + Rpc.setFirstElectionTimeoutMax( + properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS)); + } } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java index a373075e1155..1d24c559c56a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java @@ -88,6 +88,7 @@ public void init(OzoneConfiguration conf, configurator.setUpgradeFinalizationExecutor(executor); conf.setInt(HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION, HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "5s"); MiniOzoneHAClusterImpl.Builder clusterBuilder = MiniOzoneCluster.newHABuilder(conf); clusterBuilder.setNumOfStorageContainerManagers(NUM_SCMS) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index d653facf6edd..fdea9054ce7a 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -266,7 +266,7 @@ private static MiniOzoneCluster newCluster(boolean schemaV3) DatanodeConfiguration dnConf = ozoneConfig.getObject(DatanodeConfiguration.class); dnConf.setFailedDataVolumesTolerated(1); - dnConf.setDiskCheckMinGap(Duration.ofSeconds(5)); + dnConf.setDiskCheckMinGap(Duration.ofSeconds(2)); ozoneConfig.setFromObject(dnConf); MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(ozoneConfig) .setNumDatanodes(1) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java index e78aa090df97..2ce2a18691e4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java @@ -149,7 +149,7 @@ static void init() throws Exception { reconContainerManager = reconScm.getContainerManager(); LambdaTestUtils.await(60000, 5000, - () -> (reconPipelineManager.getPipelines().size() >= 4)); + () -> (reconPipelineManager.getPipelines().size() >= scmPipelineManager.getPipelines().size())); // Verify that Recon has all the pipelines from SCM. scmPipelineManager.getPipelines().forEach(p -> { diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 4a154ef27637..632eb1f73d63 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -138,6 +138,22 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, this.services = services; } + /** + * Creates a new MiniOzoneCluster with Recon. + */ + private MiniOzoneClusterImpl(OzoneConfiguration conf, + SCMConfigurator scmConfigurator, + StorageContainerManager scm, + List hddsDatanodes, + ReconServer reconServer, List services) { + this.conf = conf; + this.scm = scm; + this.hddsDatanodes = hddsDatanodes; + this.reconServer = reconServer; + this.scmConfigurator = scmConfigurator; + this.services = services; + } + /** * Creates a new MiniOzoneCluster without the OzoneManager and * StorageContainerManager. This is used by @@ -774,6 +790,9 @@ protected void configureSCM() { "3s"); conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort()); conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort()); + if (conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT) == null) { + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s"); + } } private void configureOM() { diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java index 328e8a9692c7..fed26e641389 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_CLIENT_ADDRESS_KEY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HTTP_ADDRESS_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_DU_RESERVED; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; @@ -31,6 +33,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATASTREAM_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_IPC_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SERVER_PORT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.anyHostWithFreePort; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; @@ -114,7 +117,9 @@ public OzoneConfiguration apply(OzoneConfiguration conf) throws IOException { if (currentVersion != null) { dnConf.setInt(TESTING_DATANODE_VERSION_CURRENT, currentVersion.toProtoValue()); } - + dnConf.set(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, "1s"); + dnConf.set(HDDS_INITIAL_HEARTBEAT_INTERVAL, "500ms"); + dnConf.set(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, "500ms"); return dnConf; } From 5a999d9d251de0bd4b89623209aad9214dac8998 Mon Sep 17 00:00:00 2001 From: XiChen <32928346+xichen01@users.noreply.github.com> Date: Sun, 6 Apr 2025 23:02:53 +0800 Subject: [PATCH 2/3] remove useless code; extract public method --- .../apache/hadoop/hdds/ratis/RatisHelper.java | 14 ++++++++++++++ .../common/src/main/resources/ozone-default.xml | 2 +- .../server/ratis/XceiverServerRatis.java | 11 ++--------- .../org/apache/hadoop/hdds/scm/ha/RatisUtil.java | 10 ++-------- .../hadoop/ozone/MiniOzoneClusterImpl.java | 16 ---------------- 5 files changed, 19 insertions(+), 34 deletions(-) diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java index bcff6e6071a1..4d89d4fd2cda 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; import java.util.stream.Collectors; @@ -66,6 +67,7 @@ import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; +import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.JavaUtils; @@ -645,4 +647,16 @@ public static int calculateAttempts(Duration pollInterval, Duration maxDuration) return (int) (max / interval); } + + public static void setFirstElectionTimeoutDuration( + ConfigurationSource conf, RaftProperties properties, String configKey) { + long firstElectionTimeout = conf.getTimeDuration(configKey, -1, TimeUnit.MILLISECONDS); + if (firstElectionTimeout > 0) { + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin( + properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS)); + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax( + properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS)); + } + } + } diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index c6f17586173e..7e76cf68c72c 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1167,7 +1167,7 @@ 2s OZONE, MANAGEMENT - Heartbeat interval used during Datanode initialization for Datanode. + Heartbeat interval used during Datanode initialization for SCM. diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index 5dd61302b5b8..0170a802c94b 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -388,15 +388,8 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) { leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200; RaftServerConfigKeys.Rpc.setTimeoutMax(properties, TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS)); - - long firstElectionDuration = conf.getTimeDuration(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, - -1, TimeUnit.MILLISECONDS); - if (firstElectionDuration > 0) { - RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(properties, TimeDuration.valueOf( - firstElectionDuration, TimeUnit.MILLISECONDS)); - RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(properties, TimeDuration.valueOf( - firstElectionDuration + 200, TimeUnit.MILLISECONDS)); - } + RatisHelper.setFirstElectionTimeoutDuration( + conf, properties, HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY); } private void setTimeoutForRetryCache(RaftProperties properties) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java index a4fbf8626e70..456d020f9fb8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java @@ -138,14 +138,8 @@ private static void setRaftRpcProperties(final RaftProperties properties, OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS)); - long firstElectionTimeout = ozoneConf.getTimeDuration( - ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, -1, TimeUnit.MILLISECONDS); - if (firstElectionTimeout > 0) { - Rpc.setFirstElectionTimeoutMin( - properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS)); - Rpc.setFirstElectionTimeoutMax( - properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS)); - } + RatisHelper.setFirstElectionTimeoutDuration( + ozoneConf, properties, ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT); } /** diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 632eb1f73d63..24456dd4b04e 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -138,22 +138,6 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf, this.services = services; } - /** - * Creates a new MiniOzoneCluster with Recon. - */ - private MiniOzoneClusterImpl(OzoneConfiguration conf, - SCMConfigurator scmConfigurator, - StorageContainerManager scm, - List hddsDatanodes, - ReconServer reconServer, List services) { - this.conf = conf; - this.scm = scm; - this.hddsDatanodes = hddsDatanodes; - this.reconServer = reconServer; - this.scmConfigurator = scmConfigurator; - this.services = services; - } - /** * Creates a new MiniOzoneCluster without the OzoneManager and * StorageContainerManager. This is used by From 2201557a158720c87580b115c3cd99589508589c Mon Sep 17 00:00:00 2001 From: XiChen <32928346+xichen01@users.noreply.github.com> Date: Mon, 7 Apr 2025 01:28:25 +0800 Subject: [PATCH 3/3] Replace with setIfUnset --- .../java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 24456dd4b04e..cef2724de2a5 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -774,9 +774,7 @@ protected void configureSCM() { "3s"); conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort()); conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort()); - if (conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT) == null) { - conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s"); - } + conf.setIfUnset(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s"); } private void configureOM() {