From d6b1bb2692ba8c46cdf8ceed7148117402c7ea31 Mon Sep 17 00:00:00 2001 From: Nandakumar Vadivelu Date: Sat, 3 May 2025 08:15:33 +0530 Subject: [PATCH 1/4] HDDS-12949. Clenaup SCMSafeModeManager. --- .../apache/hadoop/hdds/scm/ha/SCMContext.java | 2 +- .../hdds/scm/safemode/SCMSafeModeManager.java | 300 ++++++------------ .../hdds/scm/safemode/SafeModeExitRule.java | 4 +- .../hdds/scm/safemode/SafeModeMetrics.java | 19 +- .../scm/safemode/SafeModeRuleFactory.java | 5 + .../scm/server/StorageContainerManager.java | 9 +- .../hdds/scm/block/TestBlockManager.java | 14 +- .../hdds/scm/ha/TestBackgroundSCMService.java | 2 +- .../hadoop/hdds/scm/ha/TestSCMContext.java | 4 +- .../hdds/scm/ha/TestSCMServiceManager.java | 8 +- .../scm/pipeline/TestPipelineManagerImpl.java | 18 +- .../safemode/TestDataNodeSafeModeRule.java | 10 +- .../TestHealthyPipelineSafeModeRule.java | 31 +- .../TestOneReplicaPipelineSafeModeRule.java | 8 +- .../scm/safemode/TestSCMSafeModeManager.java | 89 +++--- .../TestSCMSafeModeWithPipelineRules.java | 10 +- .../hadoop/ozone/MiniOzoneClusterImpl.java | 5 +- .../hadoop/ozone/MiniOzoneHAClusterImpl.java | 5 +- 18 files changed, 226 insertions(+), 317 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java index 50aafe189edb..eca34e6b6ec6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMContext.java @@ -72,7 +72,7 @@ public final class SCMContext { private SCMContext(Builder b) { isLeader = b.isLeader; term = b.term; - safeModeStatus = new SafeModeStatus(b.isInSafeMode, b.isPreCheckComplete); + safeModeStatus = SafeModeStatus.of(b.isInSafeMode, b.isPreCheckComplete); finalizationCheckpoint = b.finalizationCheckpoint; scm = b.scm; threadNamePrefix = b.threadNamePrefix; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java index 248103a48ead..811e533a72b7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java @@ -17,24 +17,22 @@ package org.apache.hadoop.hdds.scm.safemode; -import com.google.common.annotations.VisibleForTesting; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT; + import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.scm.container.ContainerManager; -import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.ha.SCMContext; import org.apache.hadoop.hdds.scm.ha.SCMService.Event; import org.apache.hadoop.hdds.scm.ha.SCMServiceManager; import org.apache.hadoop.hdds.scm.node.NodeManager; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; -import org.apache.hadoop.hdds.server.events.EventPublisher; import org.apache.hadoop.hdds.server.events.EventQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,124 +40,88 @@ /** * StorageContainerManager enters safe mode on startup to allow system to * reach a stable state before becoming fully functional. SCM will wait - * for certain resources to be reported before coming out of safe mode. - * - * SafeModeExitRule defines format to define new rules which must be satisfied - * to exit Safe mode. + * for certain resources to be reported before coming out of safe mode.

* - * Current SafeMode rules: - * 1. ContainerSafeModeRule: - * On every new datanode registration, it fires - * {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this - * event. This rule process this report, increment the - * containerWithMinReplicas count when this reported replica is in the - * containerMap. Then validates if cutoff threshold for containers is meet. + * Set of {@link SafeModeExitRule} are defined to verify if the required + * resources are reported, so that SCM can come out of safemode.

* - * 2. DatanodeSafeModeRule: - * On every new datanode registration, it fires - * {@link SCMEvents#NODE_REGISTRATION_CONT_REPORT}. This rule handles this - * event. This rule process this report, and check if this is new node, add - * to its reported node list. Then validate it cutoff threshold for minimum - * number of datanode registered is met or not. + * There are two stages in safemode exit, + *

+ *
+ * Each {@link SafeModeExitRule} can be configured to be part of either + * {@code pre-check}, {@code safemode} or both.

* - * 3. HealthyPipelineSafeModeRule: - * Once the PipelineReportHandler processes the - * {@link SCMEvents#PIPELINE_REPORT}, it fires - * {@link SCMEvents#OPEN_PIPELINE}. This rule handles this - * event. This rule processes this report, and check if pipeline is healthy - * and increments current healthy pipeline count. Then validate it cutoff - * threshold for healthy pipeline is met or not. - * - * 4. OneReplicaPipelineSafeModeRule: - * Once the PipelineReportHandler processes the - * {@link SCMEvents#PIPELINE_REPORT}, it fires - * {@link SCMEvents#OPEN_PIPELINE}. This rule handles this - * event. This rule processes this report, and add the reported pipeline to - * reported pipeline set. Then validate it cutoff threshold for one replica - * per pipeline is met or not. + * Note: The Safemode logic can be completely disabled using + * {@link org.apache.hadoop.hdds.HddsConfigKeys#HDDS_SCM_SAFEMODE_ENABLED} property + *

* + * @see SafeModeExitRule + * @see DataNodeSafeModeRule + * @see HealthyPipelineSafeModeRule + * @see OneReplicaPipelineSafeModeRule + * @see RatisContainerSafeModeRule + * @see ECContainerSafeModeRule */ public class SCMSafeModeManager implements SafeModeManager { - private static final Logger LOG = - LoggerFactory.getLogger(SCMSafeModeManager.class); - private final boolean isSafeModeEnabled; - private AtomicBoolean inSafeMode = new AtomicBoolean(true); - private AtomicBoolean preCheckComplete = new AtomicBoolean(false); - private AtomicBoolean forceExitSafeMode = new AtomicBoolean(false); - - private Map exitRules = new HashMap<>(1); - private Set preCheckRules = new HashSet<>(1); - private ConfigurationSource config; - private static final String RATIS_CONTAINER_EXIT_RULE = "RatisContainerSafeModeRule"; - private static final String EC_CONTAINER_EXIT_RULE = "ECContainerSafeModeRule"; - private static final String DN_EXIT_RULE = "DataNodeSafeModeRule"; - private static final String HEALTHY_PIPELINE_EXIT_RULE = - "HealthyPipelineSafeModeRule"; - private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE = - "AtleastOneDatanodeReportedRule"; - - private Set validatedRules = new HashSet<>(); - private Set validatedPreCheckRules = new HashSet<>(1); - - private final EventQueue eventPublisher; + private static final Logger LOG = LoggerFactory.getLogger(SCMSafeModeManager.class); + + private final AtomicBoolean inSafeMode = new AtomicBoolean(true); + private final AtomicBoolean preCheckComplete = new AtomicBoolean(false); + private final AtomicBoolean forceExitSafeMode = new AtomicBoolean(false); + private final Map> exitRules = new HashMap<>(); + private final Set preCheckRules = new HashSet<>(); + private final Set validatedRules = new HashSet<>(); + private final Set validatedPreCheckRules = new HashSet<>(); + private final SCMServiceManager serviceManager; private final SCMContext scmContext; - private final SafeModeMetrics safeModeMetrics; - public SCMSafeModeManager(ConfigurationSource conf, - ContainerManager containerManager, PipelineManager pipelineManager, - NodeManager nodeManager, EventQueue eventQueue, - SCMServiceManager serviceManager, SCMContext scmContext) { - this.config = conf; - this.eventPublisher = eventQueue; + public SCMSafeModeManager(final ConfigurationSource conf, + final NodeManager nodeManager, + final PipelineManager pipelineManager, + final ContainerManager containerManager, + final SCMServiceManager serviceManager, + final EventQueue eventQueue, + final SCMContext scmContext) { this.serviceManager = serviceManager; this.scmContext = scmContext; - this.isSafeModeEnabled = conf.getBoolean( - HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED, - HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT); - - if (isSafeModeEnabled) { - this.safeModeMetrics = SafeModeMetrics.create(); - - // TODO: Remove the cyclic ("this") dependency (HDDS-11797) - SafeModeRuleFactory.initialize(config, scmContext, eventQueue, - this, pipelineManager, containerManager, nodeManager); - SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance(); + this.safeModeMetrics = SafeModeMetrics.create(); - exitRules = factory.getSafeModeRules().stream().collect( - Collectors.toMap(SafeModeExitRule::getRuleName, rule -> rule)); + // TODO: Remove the cyclic ("this") dependency (HDDS-11797) + SafeModeRuleFactory.initialize(conf, scmContext, eventQueue, this, + pipelineManager, containerManager, nodeManager); + SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance(); + factory.getSafeModeRules().forEach(rule -> exitRules.put(rule.getRuleName(), rule)); + factory.getPreCheckRules().forEach(rule -> preCheckRules.add(rule.getRuleName())); - preCheckRules = factory.getPreCheckRules().stream() - .map(SafeModeExitRule::getRuleName).collect(Collectors.toSet()); - } else { - this.safeModeMetrics = null; - exitSafeMode(eventQueue, true); + final boolean isSafeModeEnabled = conf.getBoolean(HDDS_SCM_SAFEMODE_ENABLED, HDDS_SCM_SAFEMODE_ENABLED_DEFAULT); + safeModeMetrics.setSafemodeEnabled(isSafeModeEnabled); + if (!isSafeModeEnabled) { + LOG.info("Safemode is disabled, skipping Safemode rule validation and force exiting Safemode."); + exitSafeMode(true); } } + public void start() { + emitSafeModeStatus(); + } + public void stop() { - if (isSafeModeEnabled) { - this.safeModeMetrics.unRegister(); - } + safeModeMetrics.unRegister(); } public SafeModeMetrics getSafeModeMetrics() { return safeModeMetrics; } - /** - * Emit Safe mode status. - */ - @VisibleForTesting - public void emitSafeModeStatus() { - SafeModeStatus safeModeStatus = - new SafeModeStatus(getInSafeMode(), getPreCheckComplete()); - - safeModeStatus.setForceExitSafeMode(isForceExitSafeMode()); - - // update SCMContext + private void emitSafeModeStatus() { + final SafeModeStatus safeModeStatus = SafeModeStatus.of( + getInSafeMode(), getPreCheckComplete(), isForceExitSafeMode()); scmContext.updateSafeModeStatus(safeModeStatus); // notify SCMServiceManager @@ -173,34 +135,27 @@ public void emitSafeModeStatus() { } } - public synchronized void validateSafeModeExitRules(String ruleName, - EventPublisher eventQueue) { - - if (exitRules.get(ruleName) != null) { - boolean added = validatedRules.add(ruleName); + public synchronized void validateSafeModeExitRules(String ruleName) { + if (exitRules.containsKey(ruleName)) { + validatedRules.add(ruleName); + LOG.info("{} rule is successfully validated", ruleName); if (preCheckRules.contains(ruleName)) { validatedPreCheckRules.add(ruleName); } - if (added) { - LOG.info("{} rule is successfully validated", ruleName); - } } else { // This should never happen LOG.error("No Such Exit rule {}", ruleName); } if (!getPreCheckComplete()) { - if (validatedPreCheckRules.size() == preCheckRules.size()) { - completePreCheck(eventQueue); - } + completePreCheck(); } if (validatedRules.size() == exitRules.size()) { // All rules are satisfied, we can exit safe mode. LOG.info("ScmSafeModeManager, all rules are successfully validated"); - exitSafeMode(eventQueue, false); + exitSafeMode(false); } - } /** @@ -209,33 +164,27 @@ public synchronized void validateSafeModeExitRules(String ruleName, * the safemode state change. * @param eventQueue */ - @VisibleForTesting - public void completePreCheck(EventPublisher eventQueue) { - LOG.info("All SCM safe mode pre check rules have passed"); - setPreCheckComplete(true); - emitSafeModeStatus(); + private void completePreCheck() { + if (validatedPreCheckRules.size() == preCheckRules.size()) { + LOG.info("All SCM safe mode pre check rules have passed"); + preCheckComplete.set(true); + emitSafeModeStatus(); + } } - /** - * Exit safe mode. It does following actions: - * 1. Set safe mode status to false. - * 2. Emits START_REPLICATION for ReplicationManager. - * 3. Cleanup resources. - * 4. Emit safe mode status. - * @param eventQueue - * @param force - */ - @VisibleForTesting - public void exitSafeMode(EventPublisher eventQueue, boolean force) { + public void forceExitSafeMode() { + exitSafeMode(true); + } + + private void exitSafeMode(boolean force) { LOG.info("SCM exiting safe mode."); - // If safemode is exiting, then pre check must also have passed so - // set it to true. - setPreCheckComplete(true); - setInSafeMode(false); - setForceExitSafeMode(force); + // If safemode is exiting, then pre-check must also have passed. + preCheckComplete.set(true); + inSafeMode.set(false); + forceExitSafeMode.set(force); // TODO: Remove handler registration as there is no need to listen to - // register events anymore. + // register events anymore. emitSafeModeStatus(); } @@ -262,7 +211,7 @@ public void refreshAndValidate() { exitRules.values().forEach(rule -> { rule.refresh(false); if (rule.validate() && inSafeMode.get()) { - validateSafeModeExitRules(rule.getRuleName(), eventPublisher); + validateSafeModeExitRules(rule.getRuleName()); rule.cleanup(); } }); @@ -271,20 +220,13 @@ public void refreshAndValidate() { @Override public boolean getInSafeMode() { - if (!isSafeModeEnabled) { - return false; - } return inSafeMode.get(); } - /** - * Get the safe mode status of all rules. - * - * @return map of rule statuses. - */ + /** Get the safe mode status of all rules. */ public Map> getRuleStatus() { Map> map = new HashMap<>(); - for (SafeModeExitRule exitRule : exitRules.values()) { + for (SafeModeExitRule exitRule : exitRules.values()) { map.put(exitRule.getRuleName(), Pair.of(exitRule.validate(), exitRule.getStatusText())); } @@ -295,80 +237,42 @@ public boolean getPreCheckComplete() { return preCheckComplete.get(); } - /** - * Set safe mode status. - */ - public void setInSafeMode(boolean inSafeMode) { - this.inSafeMode.set(inSafeMode); - } - - public void setPreCheckComplete(boolean newState) { - this.preCheckComplete.set(newState); - } - public boolean isForceExitSafeMode() { return forceExitSafeMode.get(); } - public void setForceExitSafeMode(boolean forceExitSafeMode) { - this.forceExitSafeMode.set(forceExitSafeMode); - } - public static Logger getLogger() { return LOG; } - @VisibleForTesting + // TODO: This will be removed by HDDS-12955 public double getCurrentContainerThreshold() { - return ((RatisContainerSafeModeRule) exitRules.get(RATIS_CONTAINER_EXIT_RULE)) - .getCurrentContainerThreshold(); - } - - @VisibleForTesting - public double getCurrentECContainerThreshold() { - return ((ECContainerSafeModeRule) exitRules.get(EC_CONTAINER_EXIT_RULE)) + return ((RatisContainerSafeModeRule) exitRules.get("RatisContainerSafeModeRule")) .getCurrentContainerThreshold(); } - @VisibleForTesting - public RatisContainerSafeModeRule getRatisContainerSafeModeRule() { - return (RatisContainerSafeModeRule) exitRules.get(RATIS_CONTAINER_EXIT_RULE); - } - - @VisibleForTesting - public ECContainerSafeModeRule getECContainerSafeModeRule() { - return (ECContainerSafeModeRule) exitRules.get(EC_CONTAINER_EXIT_RULE); - } - - @VisibleForTesting - public HealthyPipelineSafeModeRule getHealthyPipelineSafeModeRule() { - return (HealthyPipelineSafeModeRule) - exitRules.get(HEALTHY_PIPELINE_EXIT_RULE); - } - - @VisibleForTesting - public OneReplicaPipelineSafeModeRule getOneReplicaPipelineSafeModeRule() { - return (OneReplicaPipelineSafeModeRule) - exitRules.get(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE); - } - - public DataNodeSafeModeRule getDataNodeSafeModeRule() { - return (DataNodeSafeModeRule) exitRules.get(DN_EXIT_RULE); - } - /** * Class used during SafeMode status event. */ - public static class SafeModeStatus { + public static final class SafeModeStatus { + // TODO: forceExitSafeMode value is not used anywhere, check and remove (HDDS-12957). private final boolean safeModeStatus; private final boolean preCheckPassed; + private final boolean forceExitSafeMode; - private boolean forceExitSafeMode; - - public SafeModeStatus(boolean safeModeState, boolean preCheckPassed) { + private SafeModeStatus(boolean safeModeState, boolean preCheckPassed, boolean forceExitSafeMode) { this.safeModeStatus = safeModeState; this.preCheckPassed = preCheckPassed; + this.forceExitSafeMode = forceExitSafeMode; + } + + public static SafeModeStatus of(boolean safeMode, boolean preCheck) { + return of(safeMode, preCheck, false); + } + + public static SafeModeStatus of(boolean safeMode, boolean preCheck, boolean forceExit) { + return new SafeModeStatus(safeMode, preCheck, forceExit); } public boolean isInSafeMode() { @@ -379,10 +283,6 @@ public boolean isPreCheckComplete() { return preCheckPassed; } - public void setForceExitSafeMode(boolean forceExitSafeMode) { - this.forceExitSafeMode = forceExitSafeMode; - } - public boolean isForceExitSafeMode() { return forceExitSafeMode; } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java index 8b0456e68c8d..fb6f10b6f689 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeExitRule.java @@ -99,7 +99,7 @@ public final void onMessage(T report, EventPublisher publisher) { if (scmInSafeMode()) { if (validate()) { - safeModeManager.validateSafeModeExitRules(ruleName, publisher); + safeModeManager.validateSafeModeExitRules(ruleName); cleanup(); return; } @@ -107,7 +107,7 @@ public final void onMessage(T report, EventPublisher publisher) { process(report); if (validate()) { - safeModeManager.validateSafeModeExitRules(ruleName, publisher); + safeModeManager.validateSafeModeExitRules(ruleName); cleanup(); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java index d103e1f1fca0..ab1066f86908 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.safemode; +import org.apache.commons.lang3.mutable.MutableBoolean; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -28,9 +29,9 @@ * be used for monitoring during SCM startup when SCM is still in SafeMode. */ public class SafeModeMetrics { - private static final String SOURCE_NAME = - SafeModeMetrics.class.getSimpleName(); + private static final String SOURCE_NAME = SafeModeMetrics.class.getSimpleName(); + private @Metric MutableBoolean safemodeEnabled; // These all values will be set to some values when safemode is enabled. private @Metric MutableGaugeLong numContainerWithOneReplicaReportedThreshold; @@ -50,10 +51,16 @@ public class SafeModeMetrics { currentPipelinesWithAtleastOneReplicaReportedCount; public static SafeModeMetrics create() { - MetricsSystem ms = DefaultMetricsSystem.instance(); - return ms.register(SOURCE_NAME, - "SCM Safemode Metrics", - new SafeModeMetrics()); + final MetricsSystem ms = DefaultMetricsSystem.instance(); + return ms.register(SOURCE_NAME, "SCM Safemode Metrics", new SafeModeMetrics()); + } + + public void setSafemodeEnabled(boolean isSafemodeEnabled) { + this.safemodeEnabled = new MutableBoolean(isSafemodeEnabled); + } + + public MutableBoolean getSafemodeEnabled() { + return safemodeEnabled; } public void setNumHealthyPipelinesThreshold(long val) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java index 8d866d748adf..db7d2f9086b8 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java @@ -117,4 +117,9 @@ public List> getSafeModeRules() { public List> getPreCheckRules() { return preCheckRules; } + + public > T getSafeModeRule(Class ruleClass) { + return safeModeRules.stream().filter(r -> ruleClass.isAssignableFrom(r.getClass())) + .map(ruleClass::cast).findFirst().orElse(null); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 04bf80366ed2..d45cc6c1cc57 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -442,7 +442,7 @@ private StorageContainerManager(OzoneConfiguration conf, containerBalancer = new ContainerBalancer(this); // Emit initial safe mode status, as now handlers are registered. - scmSafeModeManager.emitSafeModeStatus(); + scmSafeModeManager.start(); scmHostName = HddsUtils.getHostName(conf); registerMXBean(); @@ -831,9 +831,8 @@ private void initializeSystemManagers(OzoneConfiguration conf, if (configurator.getScmSafeModeManager() != null) { scmSafeModeManager = configurator.getScmSafeModeManager(); } else { - scmSafeModeManager = new SCMSafeModeManager(conf, - containerManager, pipelineManager, scmNodeManager, eventQueue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(conf, scmNodeManager, pipelineManager, + containerManager, serviceManager, eventQueue, scmContext); } scmDecommissionManager = new NodeDecommissionManager(conf, scmNodeManager, containerManager, @@ -1972,7 +1971,7 @@ public SCMServiceManager getSCMServiceManager() { * Force SCM out of safe mode. */ public boolean exitSafeMode() { - scmSafeModeManager.exitSafeMode(eventQueue, true); + scmSafeModeManager.forceExitSafeMode(); return true; } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java index 282b92554d1e..5ed9cefb7bdf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestBlockManager.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hdds.scm.block; +import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED; import static org.apache.hadoop.ozone.OzoneConsts.GB; import static org.apache.hadoop.ozone.OzoneConsts.MB; import static org.assertj.core.api.Assertions.assertThat; @@ -115,7 +116,7 @@ void setUp(@TempDir File tempDir) throws Exception { ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT_DEFAULT); - + conf.setBoolean(HDDS_SCM_SAFEMODE_ENABLED, false); conf.setBoolean(HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, false); conf.setTimeDuration(HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL, 5, TimeUnit.SECONDS); @@ -160,12 +161,7 @@ void setUp(@TempDir File tempDir) throws Exception { new ContainerReplicaPendingOps( Clock.system(ZoneId.systemDefault()))); SCMSafeModeManager safeModeManager = new SCMSafeModeManager(conf, - containerManager, pipelineManager, nodeManager, eventQueue, serviceManager, scmContext) { - @Override - public void emitSafeModeStatus() { - // skip - } - }; + nodeManager, pipelineManager, containerManager, serviceManager, eventQueue, scmContext); SCMConfigurator configurator = new SCMConfigurator(); configurator.setScmNodeManager(nodeManager); configurator.setPipelineManager(pipelineManager); @@ -190,7 +186,7 @@ public void emitSafeModeStatus() { replicationConfig = RatisReplicationConfig .getInstance(ReplicationFactor.THREE); - scm.getScmContext().updateSafeModeStatus(new SafeModeStatus(false, true)); + scm.getScmContext().updateSafeModeStatus(SafeModeStatus.of(false, true)); } @AfterEach @@ -455,7 +451,7 @@ public void testAllocateOversizedBlock() { @Test public void testAllocateBlockFailureInSafeMode() { scm.getScmContext().updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, true)); + SCMSafeModeManager.SafeModeStatus.of(true, true)); // Test1: In safe mode expect an SCMException. Throwable t = assertThrows(IOException.class, () -> blockManager.allocateBlock(DEFAULT_BLOCK_SIZE, diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java index 86894eeb42a3..4a35daab0a51 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestBackgroundSCMService.java @@ -92,7 +92,7 @@ public void testNotifyStatusChanged() { // go into safe mode, RUNNING -> PAUSING scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, true)); + SCMSafeModeManager.SafeModeStatus.of(true, true)); backgroundSCMService.notifyStatusChanged(); assertFalse(backgroundSCMService.shouldRun()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java index 666d2034dad3..a1d34c82c36b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMContext.java @@ -62,12 +62,12 @@ public void testSafeModeOperations() { assertFalse(scmContext.isPreCheckComplete()); // in safe mode, pass preCheck - scmContext.updateSafeModeStatus(new SafeModeStatus(true, true)); + scmContext.updateSafeModeStatus(SafeModeStatus.of(true, true)); assertTrue(scmContext.isInSafeMode()); assertTrue(scmContext.isPreCheckComplete()); // out of safe mode - scmContext.updateSafeModeStatus(new SafeModeStatus(false, true)); + scmContext.updateSafeModeStatus(SafeModeStatus.of(false, true)); assertFalse(scmContext.isInSafeMode()); assertTrue(scmContext.isPreCheckComplete()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java index 04ceebd6c359..67ff3ee80c3b 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/ha/TestSCMServiceManager.java @@ -76,7 +76,7 @@ public void stop() { // PAUSING when out of safe mode. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(false, true)); + SCMSafeModeManager.SafeModeStatus.of(false, true)); serviceManager.notifyStatusChanged(); assertFalse(serviceRunWhenLeader.shouldRun()); @@ -87,7 +87,7 @@ public void stop() { // RUNNING when in safe mode. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, false)); + SCMSafeModeManager.SafeModeStatus.of(true, false)); serviceManager.notifyStatusChanged(); assertTrue(serviceRunWhenLeader.shouldRun()); @@ -146,7 +146,7 @@ public void stop() { // PAUSING when out of safe mode. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(false, true)); + SCMSafeModeManager.SafeModeStatus.of(false, true)); serviceManager.notifyStatusChanged(); assertFalse(serviceRunWhenLeaderAndOutOfSafeMode.shouldRun()); @@ -157,7 +157,7 @@ public void stop() { // PAUSING when in safe mode. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, false)); + SCMSafeModeManager.SafeModeStatus.of(true, false)); serviceManager.notifyStatusChanged(); assertFalse(serviceRunWhenLeaderAndOutOfSafeMode.shouldRun()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java index f232890c96b8..6e42523d56dd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineManagerImpl.java @@ -358,8 +358,8 @@ public void testClosePipelineShouldFailOnFollower() throws Exception { public void testPipelineReport() throws Exception { try (PipelineManagerImpl pipelineManager = createPipelineManager(true)) { SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(conf, - mock(ContainerManager.class), pipelineManager, mock(NodeManager.class), - new EventQueue(), serviceManager, scmContext); + mock(NodeManager.class), pipelineManager, mock(ContainerManager.class), + serviceManager, new EventQueue(), scmContext); Pipeline pipeline = pipelineManager .createPipeline(RatisReplicationConfig .getInstance(ReplicationFactor.THREE)); @@ -468,8 +468,8 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception { pipelineManager.getPipeline(pipeline.getId()).getPipelineState()); SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(new OzoneConfiguration(), - mock(ContainerManager.class), pipelineManager, mock(NodeManager.class), - new EventQueue(), serviceManager, scmContext); + mock(NodeManager.class), pipelineManager, mock(ContainerManager.class), + serviceManager, new EventQueue(), scmContext); PipelineReportHandler pipelineReportHandler = new PipelineReportHandler(scmSafeModeManager, pipelineManager, SCMContext.emptyContext(), conf); @@ -612,7 +612,7 @@ public void testPipelineNotCreatedUntilSafeModePrecheck() throws Exception { TimeUnit.MILLISECONDS); scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, false)); + SCMSafeModeManager.SafeModeStatus.of(true, false)); PipelineManagerImpl pipelineManager = createPipelineManager(true); assertThrows(IOException.class, @@ -633,7 +633,7 @@ public void testPipelineNotCreatedUntilSafeModePrecheck() throws Exception { // Simulate safemode check exiting. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, true)); + SCMSafeModeManager.SafeModeStatus.of(true, true)); GenericTestUtils.waitFor(() -> !pipelineManager.getPipelines().isEmpty(), 100, 10000); pipelineManager.close(); @@ -649,20 +649,20 @@ public void testSafeModeUpdatedOnSafemodeExit() throws Exception { PipelineManagerImpl pipelineManager = createPipelineManager(true); scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, false)); + SCMSafeModeManager.SafeModeStatus.of(true, false)); assertTrue(pipelineManager.getSafeModeStatus()); assertFalse(pipelineManager.isPipelineCreationAllowed()); // First pass pre-check as true, but safemode still on // Simulate safemode check exiting. scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(true, true)); + SCMSafeModeManager.SafeModeStatus.of(true, true)); assertTrue(pipelineManager.getSafeModeStatus()); assertTrue(pipelineManager.isPipelineCreationAllowed()); // Then also turn safemode off scmContext.updateSafeModeStatus( - new SCMSafeModeManager.SafeModeStatus(false, true)); + SCMSafeModeManager.SafeModeStatus.of(false, true)); assertFalse(pipelineManager.getSafeModeStatus()); assertTrue(pipelineManager.isPipelineCreationAllowed()); pipelineManager.close(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java index 847da184fab0..eab3f1cf2b88 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestDataNodeSafeModeRule.java @@ -76,13 +76,13 @@ private void setup(int requiredDns) throws Exception { serviceManager = new SCMServiceManager(); scmContext = SCMContext.emptyContext(); - SCMSafeModeManager scmSafeModeManager = - new SCMSafeModeManager(ozoneConfiguration, containerManager, - null, nodeManager, eventQueue, serviceManager, scmContext); + SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(ozoneConfiguration, + nodeManager, null, containerManager, serviceManager, eventQueue, scmContext); + scmSafeModeManager.start(); - rule = scmSafeModeManager.getDataNodeSafeModeRule(); + rule = SafeModeRuleFactory.getInstance().getSafeModeRule(DataNodeSafeModeRule.class); assertNotNull(rule); - + rule.setValidateBasedOnReportProcessing(true); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java index 8ffdba407feb..9f1b30c51d83 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestHealthyPipelineSafeModeRule.java @@ -94,12 +94,12 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines() pipelineManager.getStateManager(), config); pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS, mockRatisProvider); - SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, eventQueue, - serviceManager, scmContext); + SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, + nodeManager, pipelineManager, containerManager, serviceManager, eventQueue, scmContext); + scmSafeModeManager.start(); - HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = - scmSafeModeManager.getHealthyPipelineSafeModeRule(); + HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); // This should be immediately satisfied, as no pipelines are there yet. assertTrue(healthyPipelineSafeModeRule.validate()); @@ -172,12 +172,12 @@ public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception { pipeline3 = pipelineManager.getPipeline(pipeline3.getId()); MockRatisPipelineProvider.markPipelineHealthy(pipeline3); - SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, eventQueue, - serviceManager, scmContext); + SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, + nodeManager, pipelineManager, containerManager, serviceManager, eventQueue, scmContext); + scmSafeModeManager.start(); - HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = - scmSafeModeManager.getHealthyPipelineSafeModeRule(); + HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); // No datanodes have sent pipelinereport from datanode assertFalse(healthyPipelineSafeModeRule.validate()); @@ -265,13 +265,12 @@ public void testHealthyPipelineSafeModeRuleWithMixedPipelines() pipeline3 = pipelineManager.getPipeline(pipeline3.getId()); MockRatisPipelineProvider.markPipelineHealthy(pipeline3); - SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, eventQueue, - serviceManager, scmContext); - - HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = - scmSafeModeManager.getHealthyPipelineSafeModeRule(); + SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(config, + nodeManager, pipelineManager, containerManager, serviceManager, eventQueue, scmContext); + scmSafeModeManager.start(); + HealthyPipelineSafeModeRule healthyPipelineSafeModeRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); // No pipeline event have sent to SCMSafemodeManager assertFalse(healthyPipelineSafeModeRule.validate()); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java index 8d5e005ae02e..531bfa549abf 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestOneReplicaPipelineSafeModeRule.java @@ -115,11 +115,11 @@ private void setup(int nodes, int pipelineFactorThreeCount, createPipelines(pipelineFactorOneCount, HddsProtos.ReplicationFactor.ONE); - SCMSafeModeManager scmSafeModeManager = - new SCMSafeModeManager(ozoneConfiguration, containerManager, - pipelineManager, mockNodeManager, eventQueue, serviceManager, scmContext); + SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(ozoneConfiguration, + mockNodeManager, pipelineManager, containerManager, serviceManager, eventQueue, scmContext); + scmSafeModeManager.start(); - rule = scmSafeModeManager.getOneReplicaPipelineSafeModeRule(); + rule = SafeModeRuleFactory.getInstance().getSafeModeRule(OneReplicaPipelineSafeModeRule.class); } @Test diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index b00f5b4ea5c5..9f15c37b124f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -130,9 +130,9 @@ private void testSafeMode(int numContainers) throws Exception { } ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, null, null, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, null, null, containerManager, + serviceManager, queue, scmContext); + scmSafeModeManager.start(); assertTrue(scmSafeModeManager.getInSafeMode()); validateRuleStatus("DatanodeSafeModeRule", "registered datanodes 0"); @@ -169,9 +169,9 @@ public void testSafeModeExitRule() throws Exception { } ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, null, null, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, null, null, containerManager, + serviceManager, queue, scmContext); + scmSafeModeManager.start(); long cutOff = (long) Math.ceil(numContainers * config.getDouble( HddsConfigKeys.HDDS_SCM_SAFEMODE_THRESHOLD_PCT, @@ -235,8 +235,8 @@ public void testHealthyPipelinePercentWithIncorrectValue(double healthyPercent, ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, - () -> new SCMSafeModeManager(conf, containerManager, - pipelineManager, mockNodeManager, queue, serviceManager, scmContext)); + () -> new SCMSafeModeManager(conf, mockNodeManager, pipelineManager, containerManager, + serviceManager, queue, scmContext)); assertThat(exception).hasMessageEndingWith("value should be >= 0.0 and <= 1.0"); } @@ -301,9 +301,9 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - conf, containerManager, pipelineManager, mockNodeManager, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(conf, mockNodeManager, pipelineManager, + containerManager, serviceManager, queue, scmContext); + scmSafeModeManager.start(); assertTrue(scmSafeModeManager.getInSafeMode()); if (healthyPipelinePercent > 0) { @@ -317,12 +317,10 @@ public void testSafeModeExitRuleWithPipelineAvailabilityCheck( List pipelines = pipelineManager.getPipelines(); - int healthyPipelineThresholdCount = - scmSafeModeManager.getHealthyPipelineSafeModeRule() - .getHealthyPipelineThresholdCount(); - int oneReplicaThresholdCount = - scmSafeModeManager.getOneReplicaPipelineSafeModeRule() - .getThresholdCount(); + int healthyPipelineThresholdCount = SafeModeRuleFactory.getInstance(). + getSafeModeRule(HealthyPipelineSafeModeRule.class).getHealthyPipelineThresholdCount(); + int oneReplicaThresholdCount = SafeModeRuleFactory.getInstance(). + getSafeModeRule(OneReplicaPipelineSafeModeRule.class).getThresholdCount(); assertEquals(healthyPipelineThresholdCount, scmSafeModeManager.getSafeModeMetrics() @@ -388,15 +386,15 @@ private void validateRuleStatus(String safeModeRule, String stringToMatch) { } private void checkHealthy(int expectedCount) throws Exception { - GenericTestUtils.waitFor(() -> scmSafeModeManager - .getHealthyPipelineSafeModeRule() + GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class) .getCurrentHealthyPipelineCount() == expectedCount, 100, 5000); } private void checkOpen(int expectedCount) throws Exception { - GenericTestUtils.waitFor(() -> scmSafeModeManager - .getOneReplicaPipelineSafeModeRule() + GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() + .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) .getCurrentReportedPipelineCount() == expectedCount, 1000, 5000); } @@ -437,9 +435,8 @@ public void testDisableSafeMode() { ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); NodeManager nodeManager = mock(SCMNodeManager.class); - scmSafeModeManager = new SCMSafeModeManager( - conf, containerManager, pipelineManager, nodeManager, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(conf, nodeManager, pipelineManager, + containerManager, serviceManager, queue, scmContext); assertFalse(scmSafeModeManager.getInSafeMode()); } @@ -479,8 +476,8 @@ public void testContainerSafeModeRule() throws Exception { ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, null, null, queue, serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, null, null, + containerManager, serviceManager, queue, scmContext); assertTrue(scmSafeModeManager.getInSafeMode()); @@ -555,9 +552,8 @@ public void testContainerSafeModeRuleEC(int data, int parity) throws Exception { scmMetadataStore.getContainerTable(), new ContainerReplicaPendingOps(Clock.system(ZoneId.systemDefault()))); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, nodeManager, pipelineManager, + containerManager, serviceManager, queue, scmContext); assertTrue(scmSafeModeManager.getInSafeMode()); // Only 20 containers are involved in the calculation, @@ -569,12 +565,12 @@ public void testContainerSafeModeRuleEC(int data, int parity) throws Exception { // the threshold will reach 100%. testECContainerThreshold(containers.subList(10, 20), 1.0, data); - ECContainerSafeModeRule ecContainerSafeModeRule = - scmSafeModeManager.getECContainerSafeModeRule(); + ECContainerSafeModeRule ecContainerSafeModeRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(ECContainerSafeModeRule.class); assertTrue(ecContainerSafeModeRule.validate()); - RatisContainerSafeModeRule ratisContainerSafeModeRule = - scmSafeModeManager.getRatisContainerSafeModeRule(); + RatisContainerSafeModeRule ratisContainerSafeModeRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(RatisContainerSafeModeRule.class); assertTrue(ratisContainerSafeModeRule.validate()); } @@ -583,9 +579,9 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { conf.setInt(HddsConfigKeys.HDDS_SCM_SAFEMODE_MIN_DATANODE, numOfDns); ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - conf, containerManager, null, null, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(conf, null, null, + containerManager, serviceManager, queue, scmContext); + scmSafeModeManager.start(); // Assert SCM is in Safe mode. assertTrue(scmSafeModeManager.getInSafeMode()); @@ -597,7 +593,8 @@ private void testSafeModeDataNodes(int numOfDns) throws Exception { queue.fireEvent(SCMEvents.NODE_REGISTRATION_CONT_REPORT, nodeRegistrationContainerReport); queue.fireEvent(SCMEvents.CONTAINER_REGISTRATION_REPORT, nodeRegistrationContainerReport); assertTrue(scmSafeModeManager.getInSafeMode()); - assertEquals(1, scmSafeModeManager.getCurrentContainerThreshold()); + assertEquals(1, SafeModeRuleFactory.getInstance() + .getSafeModeRule(RatisContainerSafeModeRule.class).getCurrentContainerThreshold()); } if (numOfDns == 0) { @@ -622,7 +619,8 @@ private void testContainerThreshold(List dnContainers, queue.fireEvent(SCMEvents.CONTAINER_REGISTRATION_REPORT, nodeRegistrationContainerReport); GenericTestUtils.waitFor(() -> { - double threshold = scmSafeModeManager.getCurrentContainerThreshold(); + double threshold = SafeModeRuleFactory.getInstance() + .getSafeModeRule(RatisContainerSafeModeRule.class).getCurrentContainerThreshold(); return threshold == expectedThreshold; }, 100, 2000 * 9); } @@ -656,7 +654,8 @@ private void testECContainerThreshold(List dnContainers, // Step2. Wait for the threshold to be reached. GenericTestUtils.waitFor(() -> { - double threshold = scmSafeModeManager.getCurrentECContainerThreshold(); + double threshold = SafeModeRuleFactory.getInstance() + .getSafeModeRule(ECContainerSafeModeRule.class).getCurrentContainerThreshold(); return threshold == expectedThreshold; }, 100, 2000 * 9); } @@ -692,9 +691,9 @@ public void testSafeModePipelineExitRule() throws Exception { ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, nodeManager, pipelineManager, + containerManager, serviceManager, queue, scmContext); + scmSafeModeManager.start(); SCMDatanodeProtocolServer.NodeRegistrationContainerReport nodeRegistrationContainerReport = HddsTestUtils.createNodeRegistrationContainerReport(containers); @@ -741,9 +740,9 @@ public void testPipelinesNotCreatedUntilPreCheckPasses() throws Exception { ContainerManager containerManager = mock(ContainerManager.class); when(containerManager.getContainers(ReplicationType.RATIS)).thenReturn(containers); - scmSafeModeManager = new SCMSafeModeManager( - config, containerManager, pipelineManager, nodeManager, queue, - serviceManager, scmContext); + scmSafeModeManager = new SCMSafeModeManager(config, nodeManager, pipelineManager, + containerManager, serviceManager, queue, scmContext); + scmSafeModeManager.start(); // Assert SCM is in Safe mode. assertTrue(scmSafeModeManager.getInSafeMode()); diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java index f6e33b352626..90af9cc8aade 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java @@ -116,14 +116,15 @@ void testScmSafeMode() throws Exception { // Ceil(0.1 * 2) is 1, as one pipeline is healthy pipeline rule is // satisfied - GenericTestUtils.waitFor(() -> - scmSafeModeManager.getHealthyPipelineSafeModeRule() + GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class) .validate(), 1000, 60000); // As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are // reported this rule is not met yet. GenericTestUtils.waitFor(() -> - !scmSafeModeManager.getOneReplicaPipelineSafeModeRule() + !SafeModeRuleFactory.getInstance() + .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) .validate(), 1000, 60000); assertTrue(cluster.getStorageContainerManager().isInSafeMode()); @@ -133,7 +134,8 @@ void testScmSafeMode() throws Exception { cluster.restartHddsDatanode(restartedDatanode, false); GenericTestUtils.waitFor(() -> - scmSafeModeManager.getOneReplicaPipelineSafeModeRule() + SafeModeRuleFactory.getInstance() + .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) .validate(), 1000, 60000); // All safeMode preChecks are now satisfied, SCM should be out of safe mode. diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index ef097ab57995..dd0d476170a8 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig; import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider; import org.apache.hadoop.hdds.scm.safemode.HealthyPipelineSafeModeRule; +import org.apache.hadoop.hdds.scm.safemode.SafeModeRuleFactory; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; import org.apache.hadoop.hdds.scm.server.SCMConfigurator; import org.apache.hadoop.hdds.scm.server.SCMStorageConfig; @@ -646,8 +647,8 @@ protected StorageContainerManager createSCM() initializeScmStorage(scmStore); StorageContainerManager scm = HddsTestUtils.getScmSimple(conf, scmConfigurator); - HealthyPipelineSafeModeRule rule = - scm.getScmSafeModeManager().getHealthyPipelineSafeModeRule(); + HealthyPipelineSafeModeRule rule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); if (rule != null) { // Set threshold to wait for safe mode exit - this is needed since a // pipeline is marked open only after leader election. diff --git a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java index 92e136073206..fdea7b0f4fe8 100644 --- a/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java +++ b/hadoop-ozone/mini-cluster/src/main/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdds.scm.HddsTestUtils; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.safemode.HealthyPipelineSafeModeRule; +import org.apache.hadoop.hdds.scm.safemode.SafeModeRuleFactory; import org.apache.hadoop.hdds.scm.server.SCMConfigurator; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -572,8 +573,8 @@ protected SCMHAService createSCMService() } StorageContainerManager scm = HddsTestUtils.getScmSimple(scmConfig, scmConfigurator); - HealthyPipelineSafeModeRule rule = - scm.getScmSafeModeManager().getHealthyPipelineSafeModeRule(); + HealthyPipelineSafeModeRule rule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); if (rule != null) { // Set threshold to wait for safe mode exit - // this is needed since a pipeline is marked open only after From 25eead769b5ec263e80f816f6c780c236641c0f1 Mon Sep 17 00:00:00 2001 From: Nandakumar Vadivelu Date: Mon, 5 May 2025 14:01:42 +0530 Subject: [PATCH 2/4] SafeModeMetrics javadoc updated. --- .../org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java index ab1066f86908..67c203d6b485 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java @@ -26,7 +26,10 @@ /** * This class is used for maintaining SafeMode metric information, which can - * be used for monitoring during SCM startup when SCM is still in SafeMode. + * be used for monitoring during SCM startup when SCM is still in SafeMode.

+ * The metrics from this class are valid iff + * {@link org.apache.hadoop.hdds.HddsConfigKeys#HDDS_SCM_SAFEMODE_ENABLED} is + * set to true and the SCM is still in SafeMode. */ public class SafeModeMetrics { private static final String SOURCE_NAME = SafeModeMetrics.class.getSimpleName(); From 55dcd2ec27703131c06a9c01d8e29d7cf902acd4 Mon Sep 17 00:00:00 2001 From: Nandakumar Vadivelu Date: Mon, 5 May 2025 14:35:55 +0530 Subject: [PATCH 3/4] Fixed test failure. --- .../hadoop/hdds/scm/safemode/SCMSafeModeManager.java | 1 - .../hadoop/hdds/scm/safemode/SafeModeMetrics.java | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java index 811e533a72b7..a71e1ca16eed 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java @@ -100,7 +100,6 @@ public SCMSafeModeManager(final ConfigurationSource conf, factory.getPreCheckRules().forEach(rule -> preCheckRules.add(rule.getRuleName())); final boolean isSafeModeEnabled = conf.getBoolean(HDDS_SCM_SAFEMODE_ENABLED, HDDS_SCM_SAFEMODE_ENABLED_DEFAULT); - safeModeMetrics.setSafemodeEnabled(isSafeModeEnabled); if (!isSafeModeEnabled) { LOG.info("Safemode is disabled, skipping Safemode rule validation and force exiting Safemode."); exitSafeMode(true); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java index 67c203d6b485..d650c4056e6d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeMetrics.java @@ -17,7 +17,6 @@ package org.apache.hadoop.hdds.scm.safemode; -import org.apache.commons.lang3.mutable.MutableBoolean; import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -34,7 +33,6 @@ public class SafeModeMetrics { private static final String SOURCE_NAME = SafeModeMetrics.class.getSimpleName(); - private @Metric MutableBoolean safemodeEnabled; // These all values will be set to some values when safemode is enabled. private @Metric MutableGaugeLong numContainerWithOneReplicaReportedThreshold; @@ -58,14 +56,6 @@ public static SafeModeMetrics create() { return ms.register(SOURCE_NAME, "SCM Safemode Metrics", new SafeModeMetrics()); } - public void setSafemodeEnabled(boolean isSafemodeEnabled) { - this.safemodeEnabled = new MutableBoolean(isSafemodeEnabled); - } - - public MutableBoolean getSafemodeEnabled() { - return safemodeEnabled; - } - public void setNumHealthyPipelinesThreshold(long val) { this.numHealthyPipelinesThreshold.set(val); } From c39f9ec9f7e624fa659bd12a5e86a2f410bc1e57 Mon Sep 17 00:00:00 2001 From: Nandakumar Vadivelu Date: Sat, 10 May 2025 22:35:50 +0530 Subject: [PATCH 4/4] Addressed review comments. --- .../hdds/scm/safemode/SCMSafeModeManager.java | 1 - .../scm/safemode/SafeModeRuleFactory.java | 7 +++-- .../scm/safemode/TestSCMSafeModeManager.java | 31 +++++++++---------- .../TestSCMSafeModeWithPipelineRules.java | 22 +++++-------- 4 files changed, 28 insertions(+), 33 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java index a71e1ca16eed..e30b103ecf71 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java @@ -161,7 +161,6 @@ public synchronized void validateSafeModeExitRules(String ruleName) { * When all the precheck rules have been validated, set preCheckComplete to * true and then emit the safemode status so any listeners get notified of * the safemode state change. - * @param eventQueue */ private void completePreCheck() { if (validatedPreCheckRules.size() == preCheckRules.size()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java index db7d2f9086b8..7c6385307c1a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeRuleFactory.java @@ -119,7 +119,10 @@ public List> getPreCheckRules() { } public > T getSafeModeRule(Class ruleClass) { - return safeModeRules.stream().filter(r -> ruleClass.isAssignableFrom(r.getClass())) - .map(ruleClass::cast).findFirst().orElse(null); + return safeModeRules.stream() + .filter(r -> ruleClass.isAssignableFrom(r.getClass())) + .map(ruleClass::cast) + .findFirst() + .orElse(null); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java index 9f15c37b124f..3729ca11a4dd 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeManager.java @@ -386,16 +386,16 @@ private void validateRuleStatus(String safeModeRule, String stringToMatch) { } private void checkHealthy(int expectedCount) throws Exception { - GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() - .getSafeModeRule(HealthyPipelineSafeModeRule.class) - .getCurrentHealthyPipelineCount() == expectedCount, + final HealthyPipelineSafeModeRule pipelineRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); + GenericTestUtils.waitFor(() -> pipelineRule.getCurrentHealthyPipelineCount() == expectedCount, 100, 5000); } private void checkOpen(int expectedCount) throws Exception { - GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() - .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) - .getCurrentReportedPipelineCount() == expectedCount, + final OneReplicaPipelineSafeModeRule pipelineRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(OneReplicaPipelineSafeModeRule.class); + GenericTestUtils.waitFor(() -> pipelineRule.getCurrentReportedPipelineCount() == expectedCount, 1000, 5000); } @@ -618,11 +618,11 @@ private void testContainerThreshold(List dnContainers, nodeRegistrationContainerReport); queue.fireEvent(SCMEvents.CONTAINER_REGISTRATION_REPORT, nodeRegistrationContainerReport); - GenericTestUtils.waitFor(() -> { - double threshold = SafeModeRuleFactory.getInstance() - .getSafeModeRule(RatisContainerSafeModeRule.class).getCurrentContainerThreshold(); - return threshold == expectedThreshold; - }, 100, 2000 * 9); + final RatisContainerSafeModeRule containerRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(RatisContainerSafeModeRule.class); + GenericTestUtils.waitFor(() -> + containerRule.getCurrentContainerThreshold() == expectedThreshold, + 100, 2000 * 9); } /** @@ -653,11 +653,10 @@ private void testECContainerThreshold(List dnContainers, } // Step2. Wait for the threshold to be reached. - GenericTestUtils.waitFor(() -> { - double threshold = SafeModeRuleFactory.getInstance() - .getSafeModeRule(ECContainerSafeModeRule.class).getCurrentContainerThreshold(); - return threshold == expectedThreshold; - }, 100, 2000 * 9); + ECContainerSafeModeRule containerRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(ECContainerSafeModeRule.class); + GenericTestUtils.waitFor(() -> containerRule.getCurrentContainerThreshold() == expectedThreshold, + 100, 2000 * 9); } @Test diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java index 90af9cc8aade..134f7f6ea810 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/safemode/TestSCMSafeModeWithPipelineRules.java @@ -112,20 +112,18 @@ void testScmSafeMode() throws Exception { SCMSafeModeManager scmSafeModeManager = cluster.getStorageContainerManager().getScmSafeModeManager(); - // Ceil(0.1 * 2) is 1, as one pipeline is healthy pipeline rule is // satisfied - GenericTestUtils.waitFor(() -> SafeModeRuleFactory.getInstance() - .getSafeModeRule(HealthyPipelineSafeModeRule.class) - .validate(), 1000, 60000); + final HealthyPipelineSafeModeRule healthyPipelineRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(HealthyPipelineSafeModeRule.class); + GenericTestUtils.waitFor(healthyPipelineRule::validate, 1000, 60000); // As Ceil(0.9 * 2) is 2, and from second pipeline no datanodes's are // reported this rule is not met yet. - GenericTestUtils.waitFor(() -> - !SafeModeRuleFactory.getInstance() - .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) - .validate(), 1000, 60000); + final OneReplicaPipelineSafeModeRule oneReplicaPipelineRule = SafeModeRuleFactory.getInstance() + .getSafeModeRule(OneReplicaPipelineSafeModeRule.class); + GenericTestUtils.waitFor(() -> !oneReplicaPipelineRule.validate(), 1000, 60000); assertTrue(cluster.getStorageContainerManager().isInSafeMode()); @@ -133,10 +131,7 @@ void testScmSafeMode() throws Exception { // Now restart one datanode from the 2nd pipeline cluster.restartHddsDatanode(restartedDatanode, false); - GenericTestUtils.waitFor(() -> - SafeModeRuleFactory.getInstance() - .getSafeModeRule(OneReplicaPipelineSafeModeRule.class) - .validate(), 1000, 60000); + GenericTestUtils.waitFor(oneReplicaPipelineRule::validate, 1000, 60000); // All safeMode preChecks are now satisfied, SCM should be out of safe mode. @@ -159,8 +154,7 @@ void testScmSafeMode() throws Exception { ReplicationManager replicationManager = cluster.getStorageContainerManager().getReplicationManager(); - GenericTestUtils.waitFor(() -> - replicationManager.isRunning(), 1000, 60000); + GenericTestUtils.waitFor(replicationManager::isRunning, 1000, 60000); } @AfterEach