diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 370c480315d..c76cafda55c 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -27,10 +27,18 @@ public final class HddsConfigKeys { "hdds.heartbeat.interval"; public static final String HDDS_HEARTBEAT_INTERVAL_DEFAULT = "30s"; + public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL = + "hdds.initial.heartbeat.interval"; + public static final String HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT = + "2s"; public static final String HDDS_RECON_HEARTBEAT_INTERVAL = "hdds.recon.heartbeat.interval"; public static final String HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT = "60s"; + public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL = + "hdds.recon.initial.heartbeat.interval"; + public static final String HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT = + "2s"; public static final String HDDS_NODE_REPORT_INTERVAL = "hdds.node.report.interval"; public static final String HDDS_NODE_REPORT_INTERVAL_DEFAULT = diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java index bcff6e6071a..4d89d4fd2cd 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/RatisHelper.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.function.BooleanSupplier; import java.util.stream.Collectors; @@ -66,6 +67,7 @@ import org.apache.ratis.retry.RetryPolicy; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.rpc.SupportedRpcType; +import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.apache.ratis.thirdparty.io.netty.buffer.ByteBuf; import org.apache.ratis.util.JavaUtils; @@ -645,4 +647,16 @@ public static int calculateAttempts(Duration pollInterval, Duration maxDuration) return (int) (max / interval); } + + public static void setFirstElectionTimeoutDuration( + ConfigurationSource conf, RaftProperties properties, String configKey) { + long firstElectionTimeout = conf.getTimeDuration(configKey, -1, TimeUnit.MILLISECONDS); + if (firstElectionTimeout > 0) { + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMin( + properties, TimeDuration.valueOf(firstElectionTimeout, TimeUnit.MILLISECONDS)); + RaftServerConfigKeys.Rpc.setFirstElectionTimeoutMax( + properties, TimeDuration.valueOf(firstElectionTimeout + 200, TimeUnit.MILLISECONDS)); + } + } + } diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java index c2f79a78657..34848a9d5f7 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java @@ -635,6 +635,10 @@ public final class ScmConfigKeys { public static final String HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL = "hdds.container.ratis.statemachine.write.wait.interval"; public static final long HDDS_CONTAINER_RATIS_STATEMACHINE_WRITE_WAIT_INTERVAL_NS_DEFAULT = 10 * 60 * 1000_000_000L; + + public static final String OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT + = "ozone.scm.ha.raft.server.rpc.first-election.timeout"; + /** * Never constructed. */ diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java index 5dc4e15fc0d..4db78e644ea 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConfigKeys.java @@ -415,6 +415,8 @@ public final class OzoneConfigKeys { ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_KEY; public static final long HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT = ScmConfigKeys.HDDS_RATIS_SNAPSHOT_THRESHOLD_DEFAULT; + public static final String HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY = + "hdds.ratis.leader.first.election.minimum.timeout.duration"; public static final String HDDS_DATANODE_PLUGINS_KEY = "hdds.datanode.plugins"; diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index e2a7b293556..7e76cf68c72 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -347,6 +347,14 @@ Default is 5s. + + hdds.ratis.leader.first.election.minimum.timeout.duration + + OZONE, RATIS, MANAGEMENT + ratis Minimum timeout for the first election of a leader. + If not configured, fallback to hdds.ratis.leader.election.minimum.timeout.duration. + + hdds.node.report.interval 60000ms @@ -1154,6 +1162,14 @@ if the default value for this config is not used. + + hdds.initial.heartbeat.interval + 2s + OZONE, MANAGEMENT + + Heartbeat interval used during Datanode initialization for SCM. + + hdds.recon.heartbeat.interval 60s @@ -1162,6 +1178,14 @@ The heartbeat interval from a Datanode to Recon. + + hdds.recon.initial.heartbeat.interval + 60s + OZONE, MANAGEMENT, RECON + + Heartbeat interval used during Datanode initialization for Recon. + + ozone.scm.heartbeat.log.warn.interval.count 10 @@ -3834,6 +3858,13 @@ election. Default is 1s. + ozone.scm.ha.raft.server.rpc.first-election.timeout + + SCM, OZONE, HA, RATIS + ratis timeout for the first election of a leader. + If not configured, fallback to ozone.scm.ha.ratis.leader.election.timeout. + + ozone.scm.ha.ratis.server.leaderelection.pre-vote true SCM, OZONE, HA, RATIS diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java index 91cfaa5a21a..4060902dd22 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java @@ -18,9 +18,11 @@ package org.apache.hadoop.ozone.container.common.statemachine; import static java.lang.Math.min; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.getInitialReconHeartbeatInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getLogWarnInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getReconHeartbeatInterval; import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmHeartbeatInterval; +import static org.apache.hadoop.hdds.utils.HddsServerUtil.getScmInitialHeartbeatInterval; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -149,9 +151,9 @@ public class StateContext { * real HB frequency after scm registration. With this method the * initial registration could be significant faster. */ - private final AtomicLong heartbeatFrequency = new AtomicLong(2000); + private final AtomicLong heartbeatFrequency; - private final AtomicLong reconHeartbeatFrequency = new AtomicLong(2000); + private final AtomicLong reconHeartbeatFrequency; private final int maxCommandQueueLimit; @@ -192,6 +194,8 @@ public StateContext(ConfigurationSource conf, fullReportTypeList = new ArrayList<>(); type2Reports = new HashMap<>(); this.threadNamePrefix = threadNamePrefix; + heartbeatFrequency = new AtomicLong(getScmInitialHeartbeatInterval(conf)); + reconHeartbeatFrequency = new AtomicLong(getInitialReconHeartbeatInterval(conf)); initReportTypeCollection(); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java index ced987c814b..0170a802c94 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_LOG_APPENDER_QUEUE_NUM_ELEMENTS_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_DEFAULT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SEGMENT_SIZE_KEY; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; import static org.apache.ratis.util.Preconditions.assertTrue; import com.google.common.annotations.VisibleForTesting; @@ -387,6 +388,8 @@ private void setRatisLeaderElectionTimeout(RaftProperties properties) { leaderElectionMinTimeout.toLong(TimeUnit.MILLISECONDS) + 200; RaftServerConfigKeys.Rpc.setTimeoutMax(properties, TimeDuration.valueOf(leaderElectionMaxTimeout, TimeUnit.MILLISECONDS)); + RatisHelper.setFirstElectionTimeoutDuration( + conf, properties, HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY); } private void setTimeoutForRetryCache(RaftProperties properties) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java index eeb65cb48ea..a48a7a33a7d 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java @@ -19,8 +19,12 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_HEARTBEAT_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdds.HddsUtils.getHostNameFromConfigKeys; import static org.apache.hadoop.hdds.HddsUtils.getPortNumberFromConfigKeys; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; @@ -266,6 +270,19 @@ public static long getScmHeartbeatInterval(ConfigurationSource conf) { HDDS_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); } + + /** + * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to + * SCM. + * + * @param conf - Ozone Config + * @return - HB interval in milli seconds. + */ + public static long getScmInitialHeartbeatInterval(ConfigurationSource conf) { + return conf.getTimeDuration(HDDS_INITIAL_HEARTBEAT_INTERVAL, + HDDS_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } + /** * Heartbeat Interval - Defines the heartbeat frequency from a datanode to * Recon. @@ -278,6 +295,18 @@ public static long getReconHeartbeatInterval(ConfigurationSource conf) { HDDS_RECON_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); } + /** + * Heartbeat Interval - Defines the initial heartbeat frequency from a datanode to + * Recon. + * + * @param conf - Ozone Config + * @return - HB interval in milli seconds. + */ + public static long getInitialReconHeartbeatInterval(ConfigurationSource conf) { + return conf.getTimeDuration(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, + HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + } + /** * Get the Stale Node interval, which is used by SCM to flag a datanode as * stale, if the heartbeat from that node has been missing for this duration. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java index 0d65fff1d24..456d020f9fb 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java @@ -138,6 +138,8 @@ private static void setRaftRpcProperties(final RaftProperties properties, OZONE_SCM_HA_RATIS_NODE_FAILURE_TIMEOUT_DEFAULT, TimeUnit.MILLISECONDS), TimeUnit.MILLISECONDS)); + RatisHelper.setFirstElectionTimeoutDuration( + ozoneConf, properties, ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT); } /** diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java index a373075e115..1d24c559c56 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/upgrade/TestScmHAFinalization.java @@ -88,6 +88,7 @@ public void init(OzoneConfiguration conf, configurator.setUpgradeFinalizationExecutor(executor); conf.setInt(HDDS_SCM_INIT_DEFAULT_LAYOUT_VERSION, HDDSLayoutFeature.INITIAL_VERSION.layoutVersion()); + conf.set(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "5s"); MiniOzoneHAClusterImpl.Builder clusterBuilder = MiniOzoneCluster.newHABuilder(conf); clusterBuilder.setNumOfStorageContainerManagers(NUM_SCMS) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java index d653facf6ed..fdea9054ce7 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/dn/volume/TestDatanodeHddsVolumeFailureDetection.java @@ -266,7 +266,7 @@ private static MiniOzoneCluster newCluster(boolean schemaV3) DatanodeConfiguration dnConf = ozoneConfig.getObject(DatanodeConfiguration.class); dnConf.setFailedDataVolumesTolerated(1); - dnConf.setDiskCheckMinGap(Duration.ofSeconds(5)); + dnConf.setDiskCheckMinGap(Duration.ofSeconds(2)); ozoneConfig.setFromObject(dnConf); MiniOzoneCluster cluster = MiniOzoneCluster.newBuilder(ozoneConfig) .setNumDatanodes(1) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java index e78aa090df9..2ce2a18691e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/recon/TestReconAndAdminContainerCLI.java @@ -149,7 +149,7 @@ static void init() throws Exception { reconContainerManager = reconScm.getContainerManager(); LambdaTestUtils.await(60000, 5000, - () -> (reconPipelineManager.getPipelines().size() >= 4)); + () -> (reconPipelineManager.getPipelines().size() >= scmPipelineManager.getPipelines().size())); // Verify that Recon has all the pipelines from SCM. scmPipelineManager.getPipelines().forEach(p -> { diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 4a154ef2763..cef2724de2a 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -774,6 +774,7 @@ protected void configureSCM() { "3s"); conf.setInt(ScmConfigKeys.OZONE_SCM_RATIS_PORT_KEY, getFreePort()); conf.setInt(ScmConfigKeys.OZONE_SCM_GRPC_PORT_KEY, getFreePort()); + conf.setIfUnset(ScmConfigKeys.OZONE_SCM_HA_RATIS_SERVER_RPC_FIRST_ELECTION_TIMEOUT, "1s"); } private void configureOM() { diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java index 328e8a9692c..fed26e64138 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/UniformDatanodesFactory.java @@ -19,6 +19,8 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_CLIENT_ADDRESS_KEY; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DATANODE_HTTP_ADDRESS_KEY; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_INITIAL_HEARTBEAT_INTERVAL; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.OZONE_METADATA_DIRS; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_DU_RESERVED; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.HDDS_DATANODE_DIR_KEY; @@ -31,6 +33,7 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_DATASTREAM_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_IPC_PORT; import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_CONTAINER_RATIS_SERVER_PORT; +import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.anyHostWithFreePort; import static org.apache.ozone.test.GenericTestUtils.PortAllocator.getFreePort; @@ -114,7 +117,9 @@ public OzoneConfiguration apply(OzoneConfiguration conf) throws IOException { if (currentVersion != null) { dnConf.setInt(TESTING_DATANODE_VERSION_CURRENT, currentVersion.toProtoValue()); } - + dnConf.set(HDDS_RATIS_LEADER_FIRST_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY, "1s"); + dnConf.set(HDDS_INITIAL_HEARTBEAT_INTERVAL, "500ms"); + dnConf.set(HDDS_RECON_INITIAL_HEARTBEAT_INTERVAL, "500ms"); return dnConf; }