From e7c0cca0d16a4cfc5308f21f6f5f4b569dc56155 Mon Sep 17 00:00:00 2001
From: XiChen <32928346+xichen01@users.noreply.github.com>
Date: Sun, 6 Apr 2025 16:21:43 +0800
Subject: [PATCH 1/3] HDDS-12772. Speed up the building of MiniOzoneCluster
---
.../apache/hadoop/hdds/HddsConfigKeys.java | 8 +++++
.../apache/hadoop/hdds/scm/ScmConfigKeys.java | 4 +++
.../apache/hadoop/ozone/OzoneConfigKeys.java | 2 ++
.../src/main/resources/ozone-default.xml | 31 +++++++++++++++++++
.../common/statemachine/StateContext.java | 8 +++--
.../server/ratis/XceiverServerRatis.java | 10 ++++++
.../hadoop/hdds/utils/HddsServerUtil.java | 29 +++++++++++++++++
.../apache/hadoop/hdds/scm/ha/RatisUtil.java | 8 +++++
.../hdds/upgrade/TestScmHAFinalization.java | 1 +
...estDatanodeHddsVolumeFailureDetection.java | 2 +-
.../recon/TestReconAndAdminContainerCLI.java | 2 +-
.../hadoop/ozone/MiniOzoneClusterImpl.java | 19 ++++++++++++
.../hadoop/ozone/UniformDatanodesFactory.java | 7 ++++-
13 files changed, 126 insertions(+), 5 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
index 370c480315da..c76cafda55c2 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java
@@ -27,10 +27,18 @@ public final class HddsConfigKeys {
"hdds.heartbeat.interval";
public static final String HDDS_HEARTBEAT_INTERVAL_DEFAULT =
"30s";
+ public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL =
+ "hdds.initial.heartbeat.interval";
+ public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT =
+ "2s";
public static final String HDDS_RECON_HEARTBEAT_INTERVAL =
"hdds.recon.heartbeat.interval";
public static final String HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT =
"60s";
+ public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL =
+ "hdds.recon.initial.heartbeat.interval";
+ public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT =
+ "2s";
public static final String HDDS_NODE_REPORT_INTERVAL =
"hdds.node.report.interval";
public static final String HDDS_NODE_REPORT_INTERVAL_DEFAULT =
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index c2f79a786570..34848a9d5f73 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -635,6 +635,10 @@ public final class ScmConfigKeys {
public static final String HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL
= "hdds.container.ratis.statemachine.write.wait.interval";
public static final long HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT = 10 * 60 * 1000_000_000L;
+
+ public static final String OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT
+ = "ozone.scm.ha.raft.server.rpc.first-election.timeout";
+
/**
* Never constructed.
*/
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
index 5dc4e15fc0d6..4db78e644eaf 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java
@@ -415,6 +415,8 @@ public final class OzoneConfigKeys {
ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY;
public static final long HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT =
ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT;
+ public static final String HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY =
+ "hdds.ratis.leader.first.election.minimum.timeout.duration";
public static final String HDDS_DATANODE_PLUGINS_KEY =
"hdds.datanode.plugins";
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index e2a7b293556f..c6f17586173e 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -347,6 +347,14 @@
Default is 5s.
+
+ hdds.ratis.leader.first.election.minimum.timeout.duration
+
+ OZONE, RATIS, MANAGEMENT
+ ratis Minimum timeout for the first election of a leader.
+ If not configured, fallback to hdds.ratis.leader.election.minimum.timeout.duration.
+
+
hdds.node.report.interval
60000ms
@@ -1154,6 +1162,14 @@
if the default value for this config is not used.
+
+ hdds.initial.heartbeat.interval
+ 2s
+ OZONE, MANAGEMENT
+
+ Heartbeat interval used during Datanode initialization for Datanode.
+
+
hdds.recon.heartbeat.interval
60s
@@ -1162,6 +1178,14 @@
The heartbeat interval from a Datanode to Recon.
+
+ hdds.recon.initial.heartbeat.interval
+ 60s
+ OZONE, MANAGEMENT, RECON
+
+ Heartbeat interval used during Datanode initialization for Recon.
+
+
ozone.scm.heartbeat.log.warn.interval.count
10
@@ -3834,6 +3858,13 @@
election. Default is 1s.
+ ozone.scm.ha.raft.server.rpc.first-election.timeout
+
+ SCM, OZONE, HA, RATIS
+ ratis timeout for the first election of a leader.
+ If not configured, fallback to ozone.scm.ha.ratis.leader.election.timeout.
+
+
ozone.scm.ha.ratis.server.leaderelection.pre-vote
true
SCM, OZONE, HA, RATIS
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
index 91cfaa5a21a0..4060902dd22e 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
@@ -18,9 +18,11 @@
package org.apache.hadoop.ozone.container.common.statemachine;
import static java.lang.Math.min;
+import static org.apache.hadoop.hdds.utils.HddsServerUtil.getInitialReconHeartbeatInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getLogWarnInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getReconHeartbeatInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmHeartbeatInterval;
+import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmInitialHeartbeatInterval;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
@@ -149,9 +151,9 @@ public class StateContext {
* real HB frequency after scm registration. With this method the
* initial registration could be significant faster.
*/
- private final AtomicLong heartbeatFrequency = new AtomicLong(2000);
+ private final AtomicLong heartbeatFrequency;
- private final AtomicLong reconHeartbeatFrequency = new AtomicLong(2000);
+ private final AtomicLong reconHeartbeatFrequency;
private final int maxCommandQueueLimit;
@@ -192,6 +194,8 @@ public StateContext(ConfigurationSource conf,
fullReportTypeList = new ArrayList<>();
type2Reports = new HashMap<>();
this.threadNamePrefix = threadNamePrefix;
+ heartbeatFrequency = new AtomicLong(getScmInitialHeartbeatInterval(conf));
+ reconHeartbeatFrequency = new AtomicLong(getInitialReconHeartbeatInterval(conf));
initReportTypeCollection();
}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index ced987c814b5..5dd61302b5b8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -24,6 +24,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_KEY;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY;
import static org.apache.ratis.util.Preconditions.assertTrue;
import com.google.common.annotations.VisibleForTesting;
@@ -387,6 +388,15 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) {
leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200;
RaftServerConfigKeys.Rpc.setTimeoutMax(properties,
TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS));
+
+ long firstElectionDuration = conf.getTimeDuration(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
+ -1, TimeUnit.MILLISECONDS);
+ if (firstElectionDuration > 0) {
+ RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(properties, TimeDuration.valueOf(
+ firstElectionDuration, TimeUnit.MILLISECONDS));
+ RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(properties, TimeDuration.valueOf(
+ firstElectionDuration + 200, TimeUnit.MILLISECONDS));
+ }
}
private void setTimeoutForRetryCache(RaftProperties properties) {
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java
index eeb65cb48ea7..a48a7a33a7da 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java
@@ -19,8 +19,12 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL_DEFAULT;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsUtils.getHostNameFromConfigKeys;
import static org.apache.hadoop.hdds.HddsUtils.getPortNumberFromConfigKeys;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY;
@@ -266,6 +270,19 @@ public static long getScmHeartbeatInterval(ConfigurationSource conf) {
HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}
+
+ /**
+ * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to
+ * SCM.
+ *
+ * @param conf - Ozone Config
+ * @return - HB interval in milli seconds.
+ */
+ public static long getScmInitialHeartbeatInterval(ConfigurationSource conf) {
+ return conf.getTimeDuration(HDDS_INITIAL_HEARTBEAT_INTERVAL,
+ HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
+ }
+
/**
* Heartbeat Interval - Defines the heartbeat frequency from a datanode to
* Recon.
@@ -278,6 +295,18 @@ public static long getReconHeartbeatInterval(ConfigurationSource conf) {
HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}
+ /**
+ * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to
+ * Recon.
+ *
+ * @param conf - Ozone Config
+ * @return - HB interval in milli seconds.
+ */
+ public static long getInitialReconHeartbeatInterval(ConfigurationSource conf) {
+ return conf.getTimeDuration(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL,
+ HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
+ }
+
/**
* Get the Stale Node interval, which is used by SCM to flag a datanode as
* stale, if the heartbeat from that node has been missing for this duration.
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
index 0d65fff1d24b..a4fbf8626e70 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
@@ -138,6 +138,14 @@ private static void setRaftRpcProperties(final RaftProperties properties,
OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT,
TimeUnit.MILLISECONDS),
TimeUnit.MILLISECONDS));
+ long firstElectionTimeout = ozoneConf.getTimeDuration(
+ ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, -1, TimeUnit.MILLISECONDS);
+ if (firstElectionTimeout > 0) {
+ Rpc.setFirstElectionTimeoutMin(
+ properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS));
+ Rpc.setFirstElectionTimeoutMax(
+ properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS));
+ }
}
/**
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
index a373075e1155..1d24c559c56a 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java
@@ -88,6 +88,7 @@ public void init(OzoneConfiguration conf,
configurator.setUpgradeFinalizationExecutor(executor);
conf.setInt(HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION, HDDSLayoutFeature.INITIAL_VERSION.layoutVersion());
+ conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "5s");
MiniOzoneHAClusterImpl.Builder clusterBuilder = MiniOzoneCluster.newHABuilder(conf);
clusterBuilder.setNumOfStorageContainerManagers(NUM_SCMS)
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java
index d653facf6edd..fdea9054ce7a 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java
@@ -266,7 +266,7 @@ private static MiniOzoneCluster newCluster(boolean schemaV3)
DatanodeConfiguration dnConf =
ozoneConfig.getObject(DatanodeConfiguration.class);
dnConf.setFailedDataVolumesTolerated(1);
- dnConf.setDiskCheckMinGap(Duration.ofSeconds(5));
+ dnConf.setDiskCheckMinGap(Duration.ofSeconds(2));
ozoneConfig.setFromObject(dnConf);
MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(ozoneConfig)
.setNumDatanodes(1)
diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java
index e78aa090df97..2ce2a18691e4 100644
--- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java
+++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java
@@ -149,7 +149,7 @@ static void init() throws Exception {
reconContainerManager = reconScm.getContainerManager();
LambdaTestUtils.await(60000, 5000,
- () -> (reconPipelineManager.getPipelines().size() >= 4));
+ () -> (reconPipelineManager.getPipelines().size() >= scmPipelineManager.getPipelines().size()));
// Verify that Recon has all the pipelines from SCM.
scmPipelineManager.getPipelines().forEach(p -> {
diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index 4a154ef27637..632eb1f73d63 100644
--- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -138,6 +138,22 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf,
this.services = services;
}
+ /**
+ * Creates a new MiniOzoneCluster with Recon.
+ */
+ private MiniOzoneClusterImpl(OzoneConfiguration conf,
+ SCMConfigurator scmConfigurator,
+ StorageContainerManager scm,
+ List hddsDatanodes,
+ ReconServer reconServer, List services) {
+ this.conf = conf;
+ this.scm = scm;
+ this.hddsDatanodes = hddsDatanodes;
+ this.reconServer = reconServer;
+ this.scmConfigurator = scmConfigurator;
+ this.services = services;
+ }
+
/**
* Creates a new MiniOzoneCluster without the OzoneManager and
* StorageContainerManager. This is used by
@@ -774,6 +790,9 @@ protected void configureSCM() {
"3s");
conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort());
conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort());
+ if (conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT) == null) {
+ conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s");
+ }
}
private void configureOM() {
diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java
index 328e8a9692c7..fed26e641389 100644
--- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java
+++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java
@@ -19,6 +19,8 @@
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_CLIENT_ADDRESS_KEY;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HTTP_ADDRESS_KEY;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL;
+import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_DU_RESERVED;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY;
@@ -31,6 +33,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATASTREAM_PORT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_IPC_PORT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SERVER_PORT;
+import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY;
import static org.apache.ozone.test.GenericTestUtils.PortAllocator.anyHostWithFreePort;
import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort;
@@ -114,7 +117,9 @@ public OzoneConfiguration apply(OzoneConfiguration conf) throws IOException {
if (currentVersion != null) {
dnConf.setInt(TESTING_DATANODE_VERSION_CURRENT, currentVersion.toProtoValue());
}
-
+ dnConf.set(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, "1s");
+ dnConf.set(HDDS_INITIAL_HEARTBEAT_INTERVAL, "500ms");
+ dnConf.set(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, "500ms");
return dnConf;
}
From 5a999d9d251de0bd4b89623209aad9214dac8998 Mon Sep 17 00:00:00 2001
From: XiChen <32928346+xichen01@users.noreply.github.com>
Date: Sun, 6 Apr 2025 23:02:53 +0800
Subject: [PATCH 2/3] remove useless code; extract public method
---
.../apache/hadoop/hdds/ratis/RatisHelper.java | 14 ++++++++++++++
.../common/src/main/resources/ozone-default.xml | 2 +-
.../server/ratis/XceiverServerRatis.java | 11 ++---------
.../org/apache/hadoop/hdds/scm/ha/RatisUtil.java | 10 ++--------
.../hadoop/ozone/MiniOzoneClusterImpl.java | 16 ----------------
5 files changed, 19 insertions(+), 34 deletions(-)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
index bcff6e6071a1..4d89d4fd2cda 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java
@@ -30,6 +30,7 @@
import java.util.List;
import java.util.Map;
import java.util.UUID;
+import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
@@ -66,6 +67,7 @@
import org.apache.ratis.retry.RetryPolicy;
import org.apache.ratis.rpc.RpcType;
import org.apache.ratis.rpc.SupportedRpcType;
+import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf;
import org.apache.ratis.util.JavaUtils;
@@ -645,4 +647,16 @@ public static int calculateAttempts(Duration pollInterval, Duration maxDuration)
return (int) (max / interval);
}
+
+ public static void setFirstElectionTimeoutDuration(
+ ConfigurationSource conf, RaftProperties properties, String configKey) {
+ long firstElectionTimeout = conf.getTimeDuration(configKey, -1, TimeUnit.MILLISECONDS);
+ if (firstElectionTimeout > 0) {
+ RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(
+ properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS));
+ RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(
+ properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS));
+ }
+ }
+
}
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index c6f17586173e..7e76cf68c72c 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -1167,7 +1167,7 @@
2s
OZONE, MANAGEMENT
- Heartbeat interval used during Datanode initialization for Datanode.
+ Heartbeat interval used during Datanode initialization for SCM.
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index 5dd61302b5b8..0170a802c94b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -388,15 +388,8 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) {
leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200;
RaftServerConfigKeys.Rpc.setTimeoutMax(properties,
TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS));
-
- long firstElectionDuration = conf.getTimeDuration(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY,
- -1, TimeUnit.MILLISECONDS);
- if (firstElectionDuration > 0) {
- RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(properties, TimeDuration.valueOf(
- firstElectionDuration, TimeUnit.MILLISECONDS));
- RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(properties, TimeDuration.valueOf(
- firstElectionDuration + 200, TimeUnit.MILLISECONDS));
- }
+ RatisHelper.setFirstElectionTimeoutDuration(
+ conf, properties, HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY);
}
private void setTimeoutForRetryCache(RaftProperties properties) {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
index a4fbf8626e70..456d020f9fb8 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java
@@ -138,14 +138,8 @@ private static void setRaftRpcProperties(final RaftProperties properties,
OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT,
TimeUnit.MILLISECONDS),
TimeUnit.MILLISECONDS));
- long firstElectionTimeout = ozoneConf.getTimeDuration(
- ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, -1, TimeUnit.MILLISECONDS);
- if (firstElectionTimeout > 0) {
- Rpc.setFirstElectionTimeoutMin(
- properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS));
- Rpc.setFirstElectionTimeoutMax(
- properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS));
- }
+ RatisHelper.setFirstElectionTimeoutDuration(
+ ozoneConf, properties, ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT);
}
/**
diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index 632eb1f73d63..24456dd4b04e 100644
--- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -138,22 +138,6 @@ private MiniOzoneClusterImpl(OzoneConfiguration conf,
this.services = services;
}
- /**
- * Creates a new MiniOzoneCluster with Recon.
- */
- private MiniOzoneClusterImpl(OzoneConfiguration conf,
- SCMConfigurator scmConfigurator,
- StorageContainerManager scm,
- List hddsDatanodes,
- ReconServer reconServer, List services) {
- this.conf = conf;
- this.scm = scm;
- this.hddsDatanodes = hddsDatanodes;
- this.reconServer = reconServer;
- this.scmConfigurator = scmConfigurator;
- this.services = services;
- }
-
/**
* Creates a new MiniOzoneCluster without the OzoneManager and
* StorageContainerManager. This is used by
From 2201557a158720c87580b115c3cd99589508589c Mon Sep 17 00:00:00 2001
From: XiChen <32928346+xichen01@users.noreply.github.com>
Date: Mon, 7 Apr 2025 01:28:25 +0800
Subject: [PATCH 3/3] Replace with setIfUnset
---
.../java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index 24456dd4b04e..cef2724de2a5 100644
--- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -774,9 +774,7 @@ protected void configureSCM() {
"3s");
conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort());
conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort());
- if (conf.get(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT) == null) {
- conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s");
- }
+ conf.setIfUnset(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s");
}
private void configureOM() {