From 87ba67a0d18e96c4689f88393df8808dc21b9dae Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Tue, 25 Feb 2020 12:36:31 -0800 Subject: [PATCH 1/4] HDDS-3072. SCM scrub pipeline should be started after coming out of safe mode. --- .../pipeline/BackgroundPipelineCreator.java | 10 +++++---- .../hdds/scm/pipeline/PipelineManager.java | 13 +++++++++++ .../hdds/scm/pipeline/SCMPipelineManager.java | 18 ++++++++++++++- .../hdds/scm/safemode/SCMSafeModeManager.java | 1 - .../hdds/scm/safemode/SafeModeHandler.java | 22 ++++--------------- .../scm/server/StorageContainerManager.java | 3 +++ .../ozone/om/TestOzoneManagerRestart.java | 2 +- 7 files changed, 44 insertions(+), 25 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java index 8e4ec6a00f8f..b8f0fb623fdf 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/BackgroundPipelineCreator.java @@ -111,10 +111,12 @@ private void createPipelines() { continue; } - try { - pipelineManager.scrubPipeline(type, factor); - } catch (IOException e) { - LOG.error("Error while scrubbing pipelines {}", e); + if (!pipelineManager.getSafeModeStatus()) { + try { + pipelineManager.scrubPipeline(type, factor); + } catch (IOException e) { + LOG.error("Error while scrubbing pipelines {}", e); + } } while (true) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java index 635e032c4764..68d66ff10d0a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineManager.java @@ -112,4 +112,17 @@ void scrubPipeline(ReplicationType type, ReplicationFactor factor) default void waitPipelineReady(PipelineID pipelineID, long timeout) throws IOException { } + + /** + * Set SafeMode status. + * + * @param safeModeStatus + */ + void setSafeModeStatus(boolean safeModeStatus); + + /** + * Get SafeMode status. + * @return boolean + */ + boolean getSafeModeStatus(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index eb2e0d6d0a22..d0537b755234 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -54,6 +54,7 @@ import java.util.Set; import java.util.Collection; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.stream.Collectors; @@ -85,6 +86,8 @@ public class SCMPipelineManager implements PipelineManager { // Pipeline Manager MXBean private ObjectName pmInfoBean; + private final AtomicBoolean isInSafeMode; + public SCMPipelineManager(Configuration conf, NodeManager nodeManager, EventPublisher eventPublisher) throws IOException { @@ -127,6 +130,9 @@ protected SCMPipelineManager(Configuration conf, NodeManager nodeManager, HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL, HddsConfigKeys.HDDS_PIPELINE_REPORT_INTERVAL_DEFAULT, TimeUnit.MILLISECONDS); + this.isInSafeMode = new AtomicBoolean(conf.getBoolean( + HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED, + HddsConfigKeys.HDDS_SCM_SAFEMODE_ENABLED_DEFAULT)); } public PipelineStateManager getStateManager() { @@ -414,7 +420,7 @@ public void scrubPipeline(ReplicationType type, ReplicationFactor factor) .toEpochMilli() >= pipelineScrubTimeoutInMills) .collect(Collectors.toList()); for (Pipeline p : needToSrubPipelines) { - LOG.info("srubbing pipeline: id: " + p.getId().toString() + + LOG.info("Scrubbing pipeline: id: " + p.getId().toString() + " since it stays at ALLOCATED stage for " + Duration.between(currentTime, p.getCreationTimestamp()).toMinutes() + " mins."); @@ -618,4 +624,14 @@ protected MetadataStore getPipelineStore() { protected NodeManager getNodeManager() { return nodeManager; } + + @Override + public void setSafeModeStatus(boolean safeModeStatus) { + this.isInSafeMode.set(safeModeStatus); + } + + public boolean getSafeModeStatus() { + return this.isInSafeMode.get(); + } + } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java index 90fce3f63042..ca484c834641 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SCMSafeModeManager.java @@ -134,7 +134,6 @@ public SCMSafeModeManager(Configuration conf, exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, oneReplicaPipelineSafeModeRule); } - emitSafeModeStatus(); boolean createPipelineInSafemode = conf.getBoolean( HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION, HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_CREATION_DEFAULT); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java index 2fbe89361412..58ee50c60c1f 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java @@ -94,8 +94,8 @@ public SafeModeHandler(Configuration configuration, * Set SafeMode status based on * {@link org.apache.hadoop.hdds.scm.events.SCMEvents#SAFE_MODE_STATUS}. * - * Inform BlockManager, ScmClientProtocolServer and replicationAcitivity - * status about safeMode status. + * Inform BlockManager, ScmClientProtocolServer, ScmPipeline Manager and + * Replication Manager status about safeMode status. * * @param safeModeStatus * @param publisher @@ -106,6 +106,7 @@ public void onMessage(SafeModeStatus safeModeStatus, isInSafeMode.set(safeModeStatus.getSafeModeStatus()); scmClientProtocolServer.setSafeModeStatus(isInSafeMode.get()); scmBlockManager.setSafeModeStatus(isInSafeMode.get()); + scmPipelineManager.setSafeModeStatus(isInSafeMode.get()); if (!isInSafeMode.get()) { final Thread safeModeExitThread = new Thread(() -> { @@ -115,7 +116,7 @@ public void onMessage(SafeModeStatus safeModeStatus, Thread.currentThread().interrupt(); } replicationManager.start(); - cleanupPipelines(); + scmPipelineManager.triggerPipelineCreation(); }); safeModeExitThread.setDaemon(true); @@ -124,21 +125,6 @@ public void onMessage(SafeModeStatus safeModeStatus, } - private void cleanupPipelines() { - List pipelineList = scmPipelineManager.getPipelines(); - pipelineList.forEach((pipeline) -> { - try { - if (pipeline.getPipelineState() == Pipeline.PipelineState.ALLOCATED && - pipeline.isAllocationTimeout()) { - scmPipelineManager.finalizeAndDestroyPipeline(pipeline, false); - } - } catch (IOException ex) { - LOG.error("Finalize and destroy pipeline failed for pipeline " - + pipeline.toString(), ex); - } - }); - } - public boolean getSafeModeStatus() { return isInSafeMode.get(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 6063a6248da5..133429338a2a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -358,6 +358,9 @@ public StorageContainerManager(OzoneConfiguration conf, eventQueue.addHandler(SCMEvents.PIPELINE_ACTIONS, pipelineActionHandler); eventQueue.addHandler(SCMEvents.PIPELINE_REPORT, pipelineReportHandler); eventQueue.addHandler(SCMEvents.SAFE_MODE_STATUS, safeModeHandler); + + // Emit initial safe mode status, as now handlers are registered. + scmSafeModeManager.emitSafeModeStatus(); registerMXBean(); registerMetricsSource(this); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java index 6058fad61d2b..77698cf6c985 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java @@ -81,7 +81,7 @@ public void init() throws Exception { conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD); - conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 1); cluster = MiniOzoneCluster.newBuilder(conf) .setClusterId(clusterId) .setScmId(scmId) From b8d5fee8e889bb0b0848ac9231e3927054eecc7a Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Tue, 25 Feb 2020 12:44:45 -0800 Subject: [PATCH 2/4] remove test change --- .../org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java | 1 + .../org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java index d0537b755234..0989d34fc21d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/SCMPipelineManager.java @@ -630,6 +630,7 @@ public void setSafeModeStatus(boolean safeModeStatus) { this.isInSafeMode.set(safeModeStatus); } + @Override public boolean getSafeModeStatus() { return this.isInSafeMode.get(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java index 77698cf6c985..6058fad61d2b 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerRestart.java @@ -81,7 +81,7 @@ public void init() throws Exception { conf.setBoolean(OZONE_ACL_ENABLED, true); conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); conf.set(OZONE_ADMINISTRATORS, OZONE_ADMINISTRATORS_WILDCARD); - conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 1); + conf.setInt(OZONE_SCM_RATIS_PIPELINE_LIMIT, 10); cluster = MiniOzoneCluster.newBuilder(conf) .setClusterId(clusterId) .setScmId(scmId) From 486a3cb4d7c3553ee60d2f4ba0311d20e69da3c0 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Tue, 25 Feb 2020 16:28:54 -0800 Subject: [PATCH 3/4] check style. --- .../org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java index 58ee50c60c1f..ff5db1b525e3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.scm.block.BlockManager; import org.apache.hadoop.hdds.scm.container.ReplicationManager; -import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; import org.apache.hadoop.hdds.scm.server.SCMClientProtocolServer; import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus; @@ -30,8 +29,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; -import java.util.List; import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; From a247b0ecf6a5fcf220795c7bbeb24cae0fa1bd7c Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Wed, 4 Mar 2020 11:29:06 -0800 Subject: [PATCH 4/4] fix sammi comment --- .../org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java index ff5db1b525e3..50095abbf979 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/safemode/SafeModeHandler.java @@ -103,7 +103,6 @@ public void onMessage(SafeModeStatus safeModeStatus, isInSafeMode.set(safeModeStatus.getSafeModeStatus()); scmClientProtocolServer.setSafeModeStatus(isInSafeMode.get()); scmBlockManager.setSafeModeStatus(isInSafeMode.get()); - scmPipelineManager.setSafeModeStatus(isInSafeMode.get()); if (!isInSafeMode.get()) { final Thread safeModeExitThread = new Thread(() -> { @@ -112,6 +111,7 @@ public void onMessage(SafeModeStatus safeModeStatus, } catch (InterruptedException e) { Thread.currentThread().interrupt(); } + scmPipelineManager.setSafeModeStatus(isInSafeMode.get()); replicationManager.start(); scmPipelineManager.triggerPipelineCreation(); });