diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 32ff45b2bfd9b..36ea5c2f646cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -2009,6 +2009,9 @@ synchronized void transitionToStandby() throws IOException { synchronized void transitionToObserver() throws IOException { String operationName = "transitionToObserver"; namesystem.checkSuperuserPrivilege(operationName); + if (notBecomeActiveInSafemode && isInSafeMode()) { + throw new ServiceFailedException(getRole() + " still not leave safemode"); + } if (!haEnabled) { throw new ServiceFailedException("HA for namenode is not enabled"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java index 4d0d56c05a75c..a462cafb458c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java @@ -247,7 +247,7 @@ private boolean checkSupportObserver(HAServiceTarget target) { } private int transitionToObserver(final CommandLine cmd) - throws IOException, ServiceFailedException { + throws IOException { String[] argv = cmd.getArgs(); if (argv.length != 1) { errOut.println("transitionToObserver: incorrect number of arguments"); @@ -262,8 +262,13 @@ private int transitionToObserver(final CommandLine cmd) if (!checkManualStateManagementOK(target)) { return -1; } - HAServiceProtocol proto = target.getProxy(getConf(), 0); - HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo()); + try { + HAServiceProtocol proto = target.getProxy(getConf(), 0); + HAServiceProtocolHelper.transitionToObserver(proto, createReqInfo()); + } catch (ServiceFailedException e) { + errOut.println("transitionToObserver failed! " + e.getLocalizedMessage()); + return -1; + } return 0; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 7b65f1d9c2765..e4cb5b9ffe16d 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -3725,7 +3725,7 @@ dfs.ha.nn.not-become-active-in-safemode false - This will prevent safe mode namenodes to become active while other standby + This will prevent safe mode namenodes to become active or observer while other standby namenodes might be ready to serve requests when it is set to true. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md index df1ab68afca5b..aec1534b03b5b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithNFS.md @@ -316,12 +316,14 @@ The order in which you set these configurations is unimportant, but the values y hdfs://mycluster -* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active +* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer Whether allow namenode to become active when it is in safemode, when it is set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if auto failover is on, or will throw exception to fail the transition to - active if auto failover is off. For example: + active if auto failover is off. If you transition namenode to observer state + when it is in safemode, when this configuration is set to true, namenode will throw exception + to fail the transition to observer. For example: dfs.ha.nn.not-become-active-in-safemode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md index 6bdd4e1c52927..5591f4f22453b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/HDFSHighAvailabilityWithQJM.md @@ -376,12 +376,14 @@ The order in which you set these configurations is unimportant, but the values y /path/to/journal/node/local/data -* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active +* **dfs.ha.nn.not-become-active-in-safemode** - if prevent safe mode namenodes to become active or observer Whether allow namenode to become active when it is in safemode, when it is set to true, namenode in safemode will report SERVICE_UNHEALTHY to ZKFC if auto failover is on, or will throw exception to fail the transition to - active if auto failover is off. For example: + active if auto failover is off. If you transition namenode to observer state + when it is in safemode, when this configuration is set to true, namenode will throw exception + to fail the transition to observer. For example: dfs.ha.nn.not-become-active-in-safemode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java index 16a57c6867242..4766c4cecc9b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java @@ -977,4 +977,26 @@ public void testTransitionToActiveWhenSafeMode() throws Exception { () -> miniCluster.transitionToActive(0)); } } + + @Test + public void testTransitionToObserverWhenSafeMode() throws Exception { + Configuration config = new Configuration(); + config.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true); + try (MiniDFSCluster miniCluster = new MiniDFSCluster.Builder(config, + new File(GenericTestUtils.getRandomizedTempPath())) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(1) + .build()) { + miniCluster.waitActive(); + miniCluster.transitionToStandby(0); + miniCluster.transitionToStandby(1); + NameNode namenode0 = miniCluster.getNameNode(0); + NameNode namenode1 = miniCluster.getNameNode(1); + NameNodeAdapter.enterSafeMode(namenode0, false); + NameNodeAdapter.enterSafeMode(namenode1, false); + LambdaTestUtils.intercept(ServiceFailedException.class, + "NameNode still not leave safemode", + () -> miniCluster.transitionToObserver(0)); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java index aa048f865c2de..d0edd175ec1ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hdfs.tools; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; @@ -70,6 +72,7 @@ public class TestDFSHAAdminMiniCluster { @Before public void setup() throws IOException { conf = new Configuration(); + conf.setBoolean(DFS_HA_NN_NOT_BECOME_ACTIVE_IN_SAFEMODE, true); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0) .build(); @@ -161,7 +164,28 @@ public void testObserverIllegalTransition() throws Exception { assertEquals(-1, runTool("-transitionToActive", "nn1")); assertFalse(nnode1.isActiveState()); } - + + /** + * Tests that a Namenode in safe mode should not be transfer to observer. + */ + @Test + public void testObserverTransitionInSafeMode() throws Exception { + NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false); + DFSHAAdmin admin = new DFSHAAdmin(); + admin.setConf(conf); + System.setIn(new ByteArrayInputStream("yes\n".getBytes())); + int result = admin.run( + new String[]{"-transitionToObserver", "-forcemanual", "nn1"}); + assertEquals("State transition returned: " + result, -1, result); + + NameNodeAdapter.leaveSafeMode(cluster.getNameNode(0)); + System.setIn(new ByteArrayInputStream("yes\n".getBytes())); + int result1 = admin.run( + new String[]{"-transitionToObserver", "-forcemanual", "nn1"}); + assertEquals("State transition returned: " + result1, 0, result1); + assertFalse(cluster.getNameNode(0).isInSafeMode()); + } + @Test public void testTryFailoverToSafeMode() throws Exception { conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,