Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,18 @@ public final class HddsConfigKeys {
"hdds.heartbeat.interval";
public static final String HDDS_HEARTBEAT_INTERVAL_DEFAULT =
"30s";
public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL =
"hdds.initial.heartbeat.interval";
public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT =
"2s";
public static final String HDDS_RECON_HEARTBEAT_INTERVAL =
"hdds.recon.heartbeat.interval";
public static final String HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT =
"60s";
public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL =
"hdds.recon.initial.heartbeat.interval";
public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT =
"2s";
public static final String HDDS_NODE_REPORT_INTERVAL =
"hdds.node.report.interval";
public static final String HDDS_NODE_REPORT_INTERVAL_DEFAULT =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.function.BooleanSupplier;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -66,6 +67,7 @@
import org.apache.ratis.retry.RetryPolicy;
import org.apache.ratis.rpc.RpcType;
import org.apache.ratis.rpc.SupportedRpcType;
import org.apache.ratis.server.RaftServerConfigKeys;
import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf;
import org.apache.ratis.util.JavaUtils;
Expand Down Expand Up @@ -645,4 +647,16 @@ public static int calculateAttempts(Duration pollInterval, Duration maxDuration)
return (int) (max / interval);
}


public static void setFirstElectionTimeoutDuration(
ConfigurationSource conf, RaftProperties properties, String configKey) {
long firstElectionTimeout = conf.getTimeDuration(configKey, -1, TimeUnit.MILLISECONDS);
if (firstElectionTimeout > 0) {
RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin(
properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS));
RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax(
properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,10 @@ public final class ScmConfigKeys {
public static final String HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL
= "hdds.container.ratis.statemachine.write.wait.interval";
public static final long HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT = 10 * 60 * 1000_000_000L;

public static final String OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT
= "ozone.scm.ha.raft.server.rpc.first-election.timeout";

/**
* Never constructed.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ public final class OzoneConfigKeys {
ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY;
public static final long HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT =
ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT;
public static final String HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY =
"hdds.ratis.leader.first.election.minimum.timeout.duration";

public static final String HDDS_DATANODE_PLUGINS_KEY =
"hdds.datanode.plugins";
Expand Down
31 changes: 31 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,14 @@
Default is 5s.
</description>
</property>
<property>
<name>hdds.ratis.leader.first.election.minimum.timeout.duration</name>
Copy link
Contributor

@adoroszlai adoroszlai Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hdds.ratis.leader.first-election.minimum.timeout.duration

to be more like the one for SCM: ozone.scm.ha.ratis.leader.first-election.timeout

<value/>
<tag>OZONE, RATIS, MANAGEMENT</tag>
<description>ratis Minimum timeout for the first election of a leader.
If not configured, fallback to hdds.ratis.leader.election.minimum.timeout.duration.
</description>
</property>
<property>
<name>hdds.node.report.interval</name>
<value>60000ms</value>
Expand Down Expand Up @@ -1154,6 +1162,14 @@
if the default value for this config is not used.
</description>
</property>
<property>
<name>hdds.initial.heartbeat.interval</name>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest hdds.heartbeat.interval.initial for common prefix.

<value>2s</value>
<tag>OZONE, MANAGEMENT</tag>
<description>
Heartbeat interval used during Datanode initialization for SCM.
</description>
</property>
<property>
<name>hdds.recon.heartbeat.interval</name>
<value>60s</value>
Expand All @@ -1162,6 +1178,14 @@
The heartbeat interval from a Datanode to Recon.
</description>
</property>
<property>
<name>hdds.recon.initial.heartbeat.interval</name>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hdds.recon.heartbeat.interval.initial

(Ideally these keys for Recon should have been hdds.heartbeat.interval.recon and ...recon.initial, but hdds.recon.heartbeat.interval already exists since 1.3.0, so we should rename them with deprecation in separate task.)

<value>60s</value>
<tag>OZONE, MANAGEMENT, RECON</tag>
<description>
Heartbeat interval used during Datanode initialization for Recon.
</description>
</property>
<property>
<name>ozone.scm.heartbeat.log.warn.interval.count</name>
<value>10</value>
Expand Down Expand Up @@ -3834,6 +3858,13 @@
election. Default is 1s.</description>
</property>
<property>
<name>ozone.scm.ha.raft.server.rpc.first-election.timeout</name>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ozone.scm.ha.ratis.leader.first-election.timeout is more in line with existing keys like ozone.scm.ha.ratis.leader.election.timeout.

<value/>
<tag>SCM, OZONE, HA, RATIS</tag>
<description>ratis timeout for the first election of a leader.
If not configured, fallback to ozone.scm.ha.ratis.leader.election.timeout.
</description>
</property><property>
<name>ozone.scm.ha.ratis.server.leaderelection.pre-vote</name>
<value>true</value>
<tag>SCM, OZONE, HA, RATIS</tag>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
package org.apache.hadoop.ozone.container.common.statemachine;

import static java.lang.Math.min;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getInitialReconHeartbeatInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getLogWarnInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getReconHeartbeatInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmHeartbeatInterval;
import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmInitialHeartbeatInterval;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -149,9 +151,9 @@ public class StateContext {
* real HB frequency after scm registration. With this method the
* initial registration could be significant faster.
*/
private final AtomicLong heartbeatFrequency = new AtomicLong(2000);
private final AtomicLong heartbeatFrequency;

private final AtomicLong reconHeartbeatFrequency = new AtomicLong(2000);
private final AtomicLong reconHeartbeatFrequency;

private final int maxCommandQueueLimit;

Expand Down Expand Up @@ -192,6 +194,8 @@ public StateContext(ConfigurationSource conf,
fullReportTypeList = new ArrayList<>();
type2Reports = new HashMap<>();
this.threadNamePrefix = threadNamePrefix;
heartbeatFrequency = new AtomicLong(getScmInitialHeartbeatInterval(conf));
reconHeartbeatFrequency = new AtomicLong(getInitialReconHeartbeatInterval(conf));
initReportTypeCollection();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_KEY;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY;
import static org.apache.ratis.util.Preconditions.assertTrue;

import com.google.common.annotations.VisibleForTesting;
Expand Down Expand Up @@ -387,6 +388,8 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) {
leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200;
RaftServerConfigKeys.Rpc.setTimeoutMax(properties,
TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS));
RatisHelper.setFirstElectionTimeoutDuration(
conf, properties, HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY);
}

private void setTimeoutForRetryCache(RaftProperties properties) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@

import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdds.HddsUtils.getHostNameFromConfigKeys;
import static org.apache.hadoop.hdds.HddsUtils.getPortNumberFromConfigKeys;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY;
Expand Down Expand Up @@ -266,6 +270,19 @@ public static long getScmHeartbeatInterval(ConfigurationSource conf) {
HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}


/**
* Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to
* SCM.
*
* @param conf - Ozone Config
* @return - HB interval in milli seconds.
*/
public static long getScmInitialHeartbeatInterval(ConfigurationSource conf) {
return conf.getTimeDuration(HDDS_INITIAL_HEARTBEAT_INTERVAL,
HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}

/**
* Heartbeat Interval - Defines the heartbeat frequency from a datanode to
* Recon.
Expand All @@ -278,6 +295,18 @@ public static long getReconHeartbeatInterval(ConfigurationSource conf) {
HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}

/**
* Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to
* Recon.
*
* @param conf - Ozone Config
* @return - HB interval in milli seconds.
*/
public static long getInitialReconHeartbeatInterval(ConfigurationSource conf) {
return conf.getTimeDuration(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL,
HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS);
}

/**
* Get the Stale Node interval, which is used by SCM to flag a datanode as
* stale, if the heartbeat from that node has been missing for this duration.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ private static void setRaftRpcProperties(final RaftProperties properties,
OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT,
TimeUnit.MILLISECONDS),
TimeUnit.MILLISECONDS));
RatisHelper.setFirstElectionTimeoutDuration(
ozoneConf, properties, ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public void init(OzoneConfiguration conf,
configurator.setUpgradeFinalizationExecutor(executor);

conf.setInt(HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION, HDDSLayoutFeature.INITIAL_VERSION.layoutVersion());
conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "5s");

MiniOzoneHAClusterImpl.Builder clusterBuilder = MiniOzoneCluster.newHABuilder(conf);
clusterBuilder.setNumOfStorageContainerManagers(NUM_SCMS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ private static MiniOzoneCluster newCluster(boolean schemaV3)
DatanodeConfiguration dnConf =
ozoneConfig.getObject(DatanodeConfiguration.class);
dnConf.setFailedDataVolumesTolerated(1);
dnConf.setDiskCheckMinGap(Duration.ofSeconds(5));
dnConf.setDiskCheckMinGap(Duration.ofSeconds(2));
ozoneConfig.setFromObject(dnConf);
MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(ozoneConfig)
.setNumDatanodes(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ static void init() throws Exception {
reconContainerManager = reconScm.getContainerManager();

LambdaTestUtils.await(60000, 5000,
() -> (reconPipelineManager.getPipelines().size() >= 4));
() -> (reconPipelineManager.getPipelines().size() >= scmPipelineManager.getPipelines().size()));

// Verify that Recon has all the pipelines from SCM.
scmPipelineManager.getPipelines().forEach(p -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ protected void configureSCM() {
"3s");
conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort());
conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort());
conf.setIfUnset(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s");
}

private void configureOM() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_CLIENT_ADDRESS_KEY;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HTTP_ADDRESS_KEY;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL;
import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_DU_RESERVED;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY;
Expand All @@ -31,6 +33,7 @@
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATASTREAM_PORT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_IPC_PORT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SERVER_PORT;
import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY;
import static org.apache.ozone.test.GenericTestUtils.PortAllocator.anyHostWithFreePort;
import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort;

Expand Down Expand Up @@ -114,7 +117,9 @@ public OzoneConfiguration apply(OzoneConfiguration conf) throws IOException {
if (currentVersion != null) {
dnConf.setInt(TESTING_DATANODE_VERSION_CURRENT, currentVersion.toProtoValue());
}

dnConf.set(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, "1s");
dnConf.set(HDDS_INITIAL_HEARTBEAT_INTERVAL, "500ms");
dnConf.set(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, "500ms");
return dnConf;
}

Expand Down