diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/ServerNotLeaderException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/ServerNotLeaderException.java new file mode 100644 index 000000000000..71b26f587882 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/ratis/ServerNotLeaderException.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS,WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.hdds.ratis; + +import org.apache.hadoop.hdds.HddsUtils; +import org.apache.ratis.protocol.RaftPeerId; +import org.apache.ratis.protocol.exceptions.NotLeaderException; + +import java.io.IOException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Exception thrown when a server is not a leader for Ratis group. + */ +public class ServerNotLeaderException extends IOException { + private final String currentPeerId; + private final String leader; + private static final Pattern CURRENT_PEER_ID_PATTERN = + Pattern.compile("Server:(.*) is not the leader[.]+.*", Pattern.DOTALL); + private static final Pattern SUGGESTED_LEADER_PATTERN = + Pattern.compile(".*Suggested leader is Server:([^.]*).*", Pattern.DOTALL); + + public ServerNotLeaderException(RaftPeerId currentPeerId) { + super("Server:" + currentPeerId + " is not the leader. Could not " + + "determine the leader node."); + this.currentPeerId = currentPeerId.toString(); + this.leader = null; + } + + public ServerNotLeaderException(RaftPeerId currentPeerId, + String suggestedLeader) { + super("Server:" + currentPeerId + " is not the leader. Suggested leader is" + + " Server:" + suggestedLeader + "."); + this.currentPeerId = currentPeerId.toString(); + this.leader = suggestedLeader; + } + + public ServerNotLeaderException(String message) { + super(message); + + Matcher currentLeaderMatcher = CURRENT_PEER_ID_PATTERN.matcher(message); + if (currentLeaderMatcher.matches()) { + this.currentPeerId = currentLeaderMatcher.group(1); + + Matcher suggestedLeaderMatcher = + SUGGESTED_LEADER_PATTERN.matcher(message); + if (suggestedLeaderMatcher.matches()) { + this.leader = suggestedLeaderMatcher.group(1); + } else { + this.leader = null; + } + } else { + this.currentPeerId = null; + this.leader = null; + } + } + + public String getSuggestedLeader() { + return leader; + } + + /** + * Convert {@link org.apache.ratis.protocol.exceptions.NotLeaderException} + * to {@link ServerNotLeaderException}. + * @param notLeaderException + * @param currentPeer + * @return ServerNotLeaderException + */ + public static ServerNotLeaderException convertToNotLeaderException( + NotLeaderException notLeaderException, + RaftPeerId currentPeer, String port) { + String suggestedLeader = notLeaderException.getSuggestedLeader() != null ? + HddsUtils + .getHostName(notLeaderException.getSuggestedLeader().getAddress()) + .get() : + null; + ServerNotLeaderException serverNotLeaderException; + if (suggestedLeader != null) { + String suggestedLeaderHostPort = suggestedLeader + ":" + port; + serverNotLeaderException = + new ServerNotLeaderException(currentPeer, suggestedLeaderHostPort); + } else { + serverNotLeaderException = new ServerNotLeaderException(currentPeer); + } + return serverNotLeaderException; + } +} \ No newline at end of file diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/NonRetriableException.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/NonRetriableException.java new file mode 100644 index 000000000000..52da99e67488 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/NonRetriableException.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.scm.ha; + +import java.io.IOException; + +/** + * exception for which there should be no retry. + */ +public class NonRetriableException extends IOException { + + public NonRetriableException(IOException exception) { + super(exception); + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java index 0db2b5a54092..fb6dc96a5def 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java @@ -24,15 +24,18 @@ import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ozone.ha.ConfUtils; import org.apache.ratis.protocol.exceptions.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; +import java.io.IOException; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collection; @@ -198,12 +201,40 @@ public static Collection getSCMNodeIds( return getSCMNodeIds(configuration, scmServiceId); } + private static Throwable unwrapException(Exception e) { + IOException ioException = null; + Throwable cause = e.getCause(); + if (cause instanceof RemoteException) { + ioException = ((RemoteException) cause).unwrapRemoteException(); + } + return ioException == null ? e : ioException; + } + + /** + * Checks if the underlying exception if of type StateMachine. Used by scm + * clients. + */ + public static boolean isNonRetriableException(Exception e) { + Throwable t = + getExceptionForClass(e, StateMachineException.class); + return t == null ? false : true; + } + + /** + * Checks if the underlying exception if of type non retriable. Used by scm + * clients. + */ + public static boolean checkNonRetriableException(Exception e) { + Throwable t = unwrapException(e); + return NonRetriableException.class.isInstance(t); + } + // This will return the underlying exception after unwrapping // the exception to see if it matches with expected exception // list , returns true otherwise will return false. public static boolean isRetriableWithNoFailoverException(Exception e) { Throwable t = e; - while (t != null && t.getCause() != null) { + while (t != null) { for (Class clazz : getRetriableWithNoFailoverExceptionList()) { if (clazz.isInstance(t)) { @@ -215,6 +246,41 @@ public static boolean isRetriableWithNoFailoverException(Exception e) { return false; } + /** + * Checks if the underlying exception if of type retriable with no failover. + * Used by scm clients. + */ + public static boolean checkRetriableWithNoFailoverException(Exception e) { + Throwable t = unwrapException(e); + return RetriableWithNoFailoverException.class.isInstance(t); + } + + public static Throwable getNotLeaderException(Exception e) { + return getExceptionForClass(e, NotLeaderException.class); + } + + public static Throwable getServerNotLeaderException(Exception e) { + return getExceptionForClass(e, ServerNotLeaderException.class); + } + + // This will return the underlying NotLeaderException exception + public static Throwable getExceptionForClass(Exception e, + Class clazz) { + IOException ioException = null; + Throwable cause = e.getCause(); + if (cause instanceof RemoteException) { + ioException = ((RemoteException) cause).unwrapRemoteException(); + } + Throwable t = ioException == null ? e : ioException; + while (t != null) { + if (clazz.isInstance(t)) { + return t; + } + t = t.getCause(); + } + return null; + } + public static List> getRetriableWithNoFailoverExceptionList() { return RETRIABLE_WITH_NO_FAILOVER_EXCEPTION_LIST; @@ -222,13 +288,15 @@ Exception>> getRetriableWithNoFailoverExceptionList() { public static RetryPolicy.RetryAction getRetryAction(int failovers, int retry, Exception e, int maxRetryCount, long retryInterval) { - if (SCMHAUtils.isRetriableWithNoFailoverException(e)) { + if (SCMHAUtils.checkRetriableWithNoFailoverException(e)) { if (retry < maxRetryCount) { return new RetryPolicy.RetryAction( RetryPolicy.RetryAction.RetryDecision.RETRY, retryInterval); } else { return RetryPolicy.RetryAction.FAIL; } + } else if (SCMHAUtils.checkNonRetriableException(e)) { + return RetryPolicy.RetryAction.FAIL; } else { if (failovers < maxRetryCount) { return new RetryPolicy.RetryAction( diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index ac8169e35c50..0524b285b949 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -2764,6 +2764,14 @@ + + ozone.client.key.provider.cache.expiry + OZONE, CLIENT, SECURITY + 10d + Ozone client security key provider cache expiration time. + + + ozone.scm.info.wait.duration OZONE, SCM, OM @@ -2773,12 +2781,4 @@ - - ozone.client.key.provider.cache.expiry - OZONE, CLIENT, SECURITY - 10d - Ozone client security key provider cache expiration time. - - - diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java index d93f7f63057b..412b2ec88c00 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/ScmBlockLocationProtocolClientSideTranslatorPB.java @@ -29,7 +29,6 @@ import org.apache.hadoop.hdds.client.StandaloneReplicationConfig; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationRequest; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.SCMBlockLocationResponse; import org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos.Type; @@ -119,11 +118,6 @@ private SCMBlockLocationResponse submitRequest( try { SCMBlockLocationResponse response = rpcProxy.send(NULL_RPC_CONTROLLER, req); - if (response.getStatus() == - ScmBlockLocationProtocolProtos.Status.SCM_NOT_LEADER) { - failoverProxyProvider - .performFailoverToAssignedLeader(response.getLeaderSCMNodeId()); - } return response; } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMBlockLocationFailoverProxyProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMBlockLocationFailoverProxyProvider.java index 4253d040877d..b0188facea26 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMBlockLocationFailoverProxyProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMBlockLocationFailoverProxyProvider.java @@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo; import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolPB; @@ -29,7 +30,6 @@ import org.apache.hadoop.io.retry.FailoverProxyProvider; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryPolicy.RetryAction; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.net.NetUtils; @@ -103,7 +103,7 @@ public SCMBlockLocationFailoverProxyProvider(ConfigurationSource conf) { this.retryInterval = config.getRetryInterval(); } - private void loadConfigs() { + private synchronized void loadConfigs() { scmNodeIds = new ArrayList<>(); List scmNodeInfoList = SCMNodeInfo.buildNodeInfo(conf); @@ -131,7 +131,14 @@ private void loadConfigs() { } @VisibleForTesting - public synchronized String getCurrentProxyOMNodeId() { + public synchronized void changeCurrentProxy(String nodeId) { + currentProxyIndex = scmNodeIds.indexOf(nodeId); + currentProxySCMNodeId = nodeId; + nextProxyIndex(); + } + + @VisibleForTesting + public synchronized String getCurrentProxySCMNodeId() { return currentProxySCMNodeId; } @@ -143,15 +150,28 @@ public synchronized ProxyInfo getProxy() { } @Override - public void performFailover(ScmBlockLocationProtocolPB newLeader) { + public synchronized void performFailover( + ScmBlockLocationProtocolPB newLeader) { // Should do nothing here. - LOG.debug("Failing over to next proxy. {}", getCurrentProxyOMNodeId()); + LOG.debug("Failing over to next proxy. {}", getCurrentProxySCMNodeId()); } - public void performFailoverToAssignedLeader(String newLeader) { + public synchronized void performFailoverToAssignedLeader(String newLeader, + Exception e) { + ServerNotLeaderException snle = + (ServerNotLeaderException) SCMHAUtils.getServerNotLeaderException(e); + if (snle != null && snle.getSuggestedLeader() != null) { + newLeader = scmProxyInfoMap.values().stream().filter( + proxyInfo -> NetUtils.getHostPortString(proxyInfo.getAddress()) + .equals(snle.getSuggestedLeader())).findFirst().get().getNodeId(); + LOG.debug("Performing failover to suggested leader {}, nodeId {}", + snle.getSuggestedLeader(), newLeader); + } if (newLeader == null) { // If newLeader is not assigned, it will fail over to next proxy. nextProxyIndex(); + LOG.debug("Performing failover to next proxy node {}", + currentProxySCMNodeId); } else { if (!assignLeaderToNode(newLeader)) { LOG.debug("Failing over SCM proxy to nodeId: {}", newLeader); @@ -249,8 +269,8 @@ public RetryPolicy getSCMBlockLocationRetryPolicy(String newLeader) { @Override public RetryAction shouldRetry(Exception e, int retry, int failover, boolean b) { - if (!SCMHAUtils.isRetriableWithNoFailoverException(e)) { - performFailoverToAssignedLeader(newLeader); + if (!SCMHAUtils.checkRetriableWithNoFailoverException(e)) { + performFailoverToAssignedLeader(newLeader, e); } return SCMHAUtils.getRetryAction(failover, retry, e, maxRetryCount, getRetryInterval()); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java index 65acfaea3a1c..df48b233f622 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java @@ -37,6 +37,8 @@ public class SCMClientConfig { public static final String SCM_CLIENT_RPC_TIME_OUT = "rpc.timeout"; public static final String SCM_CLIENT_FAILOVER_MAX_RETRY = "failover.max.retry"; + public static final String SCM_CLIENT_MAX_RETRY_TIMEOUT = + "max.retry.timeout"; public static final String SCM_CLIENT_RETRY_INTERVAL = "failover.retry.interval"; @@ -54,6 +56,16 @@ public class SCMClientConfig { ) private long rpcTimeOut = 15 * 60 * 1000; + @Config(key = SCM_CLIENT_MAX_RETRY_TIMEOUT, + defaultValue = "10m", + type = ConfigType.TIME, + timeUnit = TimeUnit.MILLISECONDS, + tags = {OZONE, SCM, CLIENT}, + description = "Max retry timeout for SCM Client" + ) + + private long maxRetryTimeout = 10 * 60 * 1000; + @Config(key = SCM_CLIENT_FAILOVER_MAX_RETRY, defaultValue = "15", type = ConfigType.INT, @@ -86,7 +98,16 @@ public void setRpcTimeOut(long timeOut) { } public int getRetryCount() { - return retryCount; + long duration = getMaxRetryTimeout(); + int retryCountFromMaxTimeOut = (int) (duration / getRetryInterval()); + // If duration is set to lesser value, fall back to actual default + // retry count. + return retryCountFromMaxTimeOut > retryCount ? + retryCountFromMaxTimeOut : retryCount; + } + + public long getMaxRetryTimeout() { + return maxRetryTimeout; } public void setRetryCount(int retryCount) { @@ -100,4 +121,8 @@ public long getRetryInterval() { public void setRetryInterval(long retryInterval) { this.retryInterval = retryInterval; } + + public void setMaxRetryTimeout(long timeout) { + this.maxRetryTimeout = timeout; + } } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMContainerLocationFailoverProxyProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMContainerLocationFailoverProxyProvider.java index 4c919ae4839a..96e627555535 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMContainerLocationFailoverProxyProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMContainerLocationFailoverProxyProvider.java @@ -22,6 +22,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; @@ -99,7 +100,7 @@ public SCMContainerLocationFailoverProxyProvider(ConfigurationSource conf) { } @VisibleForTesting - protected void loadConfigs() { + protected synchronized void loadConfigs() { List scmNodeInfoList = SCMNodeInfo.buildNodeInfo(conf); scmNodeIds = new ArrayList<>(); @@ -131,6 +132,13 @@ public synchronized String getCurrentProxySCMNodeId() { return currentProxySCMNodeId; } + @VisibleForTesting + public synchronized void changeCurrentProxy(String nodeId) { + currentProxyIndex = scmNodeIds.indexOf(nodeId); + currentProxySCMNodeId = nodeId; + nextProxyIndex(); + } + @Override public synchronized ProxyInfo getProxy() { ProxyInfo currentProxyInfo @@ -148,19 +156,33 @@ public synchronized List getProxies() { } @Override - public synchronized void performFailover( + public void performFailover( StorageContainerLocationProtocolPB newLeader) { // Should do nothing here. LOG.debug("Failing over to next proxy. {}", getCurrentProxySCMNodeId()); } - public synchronized void performFailoverToAssignedLeader(String newLeader) { + public synchronized void performFailoverToAssignedLeader(String newLeader, + Exception e) { + ServerNotLeaderException snle = + (ServerNotLeaderException) SCMHAUtils.getServerNotLeaderException(e); + if (snle != null && snle.getSuggestedLeader() != null) { + newLeader = scmProxyInfoMap.values().stream().filter( + proxyInfo -> NetUtils.getHostPortString(proxyInfo.getAddress()) + .equals(snle.getSuggestedLeader())).findFirst().get().getNodeId(); + LOG.debug("Performing failover to suggested leader {}, nodeId {}", + snle.getSuggestedLeader(), newLeader); + } if (newLeader == null) { - // If newLeader is not assigned, fail over to next proxy. - nextProxyIndex(); - } else if (!assignLeaderToNode(newLeader)) { - // If failed to fail over to newLeader, fail over to next proxy. + // If newLeader is not assigned, it will fail over to next proxy. nextProxyIndex(); + LOG.debug("Performing failover to next proxy node {}", + currentProxySCMNodeId); + } else { + if (!assignLeaderToNode(newLeader)) { + LOG.debug("Failing over SCM proxy to nodeId: {}", newLeader); + nextProxyIndex(); + } } } @@ -181,37 +203,19 @@ public synchronized void close() throws IOException { } } - public RetryPolicy.RetryAction getRetryAction(int failovers, int retry, - Exception e) { - if (SCMHAUtils.isRetriableWithNoFailoverException(e)) { - if (retry < maxRetryCount) { - return new RetryPolicy.RetryAction( - RetryPolicy.RetryAction.RetryDecision.RETRY, getRetryInterval()); - } else { - return RetryPolicy.RetryAction.FAIL; - } - } else if (failovers < maxRetryCount) { - return new RetryPolicy.RetryAction( - RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY, - getRetryInterval()); - } else { - return RetryPolicy.RetryAction.FAIL; - } - } - private long getRetryInterval() { // TODO add exponential backup return retryInterval; } - private int nextProxyIndex() { + private synchronized int nextProxyIndex() { // round robin the next proxy currentProxyIndex = (currentProxyIndex + 1) % scmProxies.size(); currentProxySCMNodeId = scmNodeIds.get(currentProxyIndex); return currentProxyIndex; } - private boolean assignLeaderToNode(String newLeaderNodeId) { + private synchronized boolean assignLeaderToNode(String newLeaderNodeId) { if (!currentProxySCMNodeId.equals(newLeaderNodeId) && scmProxies.containsKey(newLeaderNodeId)) { currentProxySCMNodeId = newLeaderNodeId; @@ -220,7 +224,6 @@ private boolean assignLeaderToNode(String newLeaderNodeId) { LOG.debug("Failing over SCM proxy to nodeId: {}", newLeaderNodeId); return true; } - return false; } @@ -271,8 +274,8 @@ public RetryPolicy getRetryPolicy() { @Override public RetryAction shouldRetry(Exception e, int retry, int failover, boolean b) { - if (!SCMHAUtils.isRetriableWithNoFailoverException(e)) { - performFailoverToAssignedLeader(null); + if (!SCMHAUtils.checkRetriableWithNoFailoverException(e)) { + performFailoverToAssignedLeader(null, e); } return SCMHAUtils.getRetryAction(failover, retry, e, maxRetryCount, getRetryInterval()); diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMSecurityProtocolFailoverProxyProvider.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMSecurityProtocolFailoverProxyProvider.java index 13f1bc04c4ef..23f3a3e6cbea 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMSecurityProtocolFailoverProxyProvider.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMSecurityProtocolFailoverProxyProvider.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.conf.ConfigurationException; import org.apache.hadoop.hdds.conf.ConfigurationSource; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolPB; +import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo; import org.apache.hadoop.hdds.utils.HAUtils; @@ -99,7 +100,7 @@ public SCMSecurityProtocolFailoverProxyProvider(ConfigurationSource conf, this.retryInterval = scmClientConfig.getRetryInterval(); } - protected void loadConfigs() { + protected synchronized void loadConfigs() { List scmNodeInfoList = SCMNodeInfo.buildNodeInfo(conf); scmNodeIds = new ArrayList<>(); @@ -173,7 +174,7 @@ private SCMSecurityProtocolPB createSCMProxy(InetSocketAddress scmAddress) @Override - public void performFailover(SCMSecurityProtocolPB currentProxy) { + public synchronized void performFailover(SCMSecurityProtocolPB currentProxy) { if (LOG.isDebugEnabled()) { int currentIndex = getCurrentProxyIndex(); LOG.debug("Failing over SCM Security proxy to index: {}, nodeId: {}", @@ -181,10 +182,46 @@ public void performFailover(SCMSecurityProtocolPB currentProxy) { } } + public synchronized void performFailoverToAssignedLeader(String newLeader, + Exception e) { + ServerNotLeaderException snle = + (ServerNotLeaderException) SCMHAUtils.getServerNotLeaderException(e); + if (snle != null && snle.getSuggestedLeader() != null) { + newLeader = scmProxyInfoMap.values().stream().filter( + proxyInfo -> NetUtils.getHostPortString(proxyInfo.getAddress()) + .equals(snle.getSuggestedLeader())).findFirst().get().getNodeId(); + LOG.debug("Performing failover to suggested leader {}, nodeId {}", + snle.getSuggestedLeader(), newLeader); + } + if (newLeader == null) { + // If newLeader is not assigned, it will fail over to next proxy. + performFailoverToNextProxy(); + LOG.debug("Performing failover to next proxy node {}", + currentProxySCMNodeId); + } else { + if (!assignLeaderToNode(newLeader)) { + LOG.debug("Failing over SCM proxy to nodeId: {}", newLeader); + performFailoverToNextProxy(); + } + } + } + + private synchronized boolean assignLeaderToNode(String newLeaderNodeId) { + if (!currentProxySCMNodeId.equals(newLeaderNodeId) + && scmProxies.containsKey(newLeaderNodeId)) { + currentProxySCMNodeId = newLeaderNodeId; + currentProxyIndex = scmNodeIds.indexOf(currentProxySCMNodeId); + + LOG.debug("Failing over SCM proxy to nodeId: {}", newLeaderNodeId); + return true; + } + + return false; + } /** * Performs fail-over to the next proxy. */ - public void performFailoverToNextProxy() { + public synchronized void performFailoverToNextProxy() { int newProxyIndex = incrementProxyIndex(); if (LOG.isDebugEnabled()) { LOG.debug("Incrementing SCM Security proxy index to {}, nodeId: {}", @@ -228,11 +265,8 @@ public RetryAction shouldRetry(Exception exception, int retries, return RetryAction.FAIL; } - // Perform fail over to next proxy, as right now we don't have any - // suggested leader ID from server, we fail over to next one. - // TODO: Act based on server response if leader id is passed. - if (!SCMHAUtils.isRetriableWithNoFailoverException(exception)) { - performFailoverToNextProxy(); + if (!SCMHAUtils.checkRetriableWithNoFailoverException(exception)) { + performFailoverToAssignedLeader(null, exception); } return SCMHAUtils .getRetryAction(failovers, retries, exception, maxRetryCount, @@ -250,7 +284,7 @@ public Class< SCMSecurityProtocolPB > getInterface() { } @Override - public void close() throws IOException { + public synchronized void close() throws IOException { for (Map.Entry> proxy : scmProxies.entrySet()) { if (proxy.getValue() != null) { diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java index 112843181fba..b3f4c8dbbe6d 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java @@ -79,8 +79,7 @@ public final class HAUtils { private HAUtils() { } - public static ScmInfo getScmInfo(OzoneConfiguration conf) - throws IOException { + public static ScmInfo getScmInfo(OzoneConfiguration conf) throws IOException { OzoneConfiguration configuration = new OzoneConfiguration(conf); try { long duration = conf.getTimeDuration(OZONE_SCM_INFO_WAIT_DURATION, @@ -88,15 +87,13 @@ public static ScmInfo getScmInfo(OzoneConfiguration conf) SCMClientConfig scmClientConfig = configuration.getObject(SCMClientConfig.class); int retryCount = - (int) (duration / (scmClientConfig.getRetryInterval()/1000)); - + (int) (duration / (scmClientConfig.getRetryInterval() / 1000)); // If duration is set to lesser value, fall back to actual default // retry count. if (retryCount > scmClientConfig.getRetryCount()) { scmClientConfig.setRetryCount(retryCount); configuration.setFromObject(scmClientConfig); } - return getScmBlockClient(configuration).getScmInfo(); } catch (IOException e) { throw e; diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java index e3a255041828..d8a4d9475d00 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HddsServerUtil.java @@ -81,10 +81,10 @@ import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_RPC_RETRY_COUNT_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_RPC_RETRY_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_HEARTBEAT_RPC_RETRY_INTERVAL_DEFAULT; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION; -import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION_DEFAULT; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL_DEFAULT; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION; +import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION_DEFAULT; import static org.apache.hadoop.hdds.server.ServerUtils.sanitizeUserArgs; import org.rocksdb.RocksDBException; @@ -467,10 +467,9 @@ public static SCMSecurityProtocolClientSideTranslatorPB getScmSecurityClient( OzoneConfiguration configuration = new OzoneConfiguration(conf); long duration = conf.getTimeDuration(OZONE_SCM_INFO_WAIT_DURATION, OZONE_SCM_INFO_WAIT_DURATION_DEFAULT, TimeUnit.SECONDS); - SCMClientConfig scmClientConfig = - conf.getObject(SCMClientConfig.class); + SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class); int retryCount = - (int) (duration / (scmClientConfig.getRetryInterval()/1000)); + (int) (duration / (scmClientConfig.getRetryInterval() / 1000)); // If duration is set to lesser value, fall back to actual default // retry count. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java index 75db7bf1c034..16394e6d63ad 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/RatisUtil.java @@ -18,18 +18,22 @@ package org.apache.hadoop.hdds.scm.ha; import com.google.common.base.Strings; +import com.google.protobuf.ServiceException; import org.apache.hadoop.hdds.conf.ConfigurationSource; +import org.apache.hadoop.hdds.ratis.ServerNotLeaderException; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.server.ServerUtils; import org.apache.ratis.RaftConfigKeys; import org.apache.ratis.conf.RaftProperties; import org.apache.ratis.grpc.GrpcConfigKeys; +import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.apache.ratis.rpc.RpcType; import org.apache.ratis.server.RaftServerConfigKeys; import org.apache.ratis.util.SizeInBytes; import org.apache.ratis.util.TimeDuration; import java.io.File; +import java.io.IOException; import java.util.Collections; import java.util.concurrent.TimeUnit; @@ -174,4 +178,18 @@ private static void setRaftSnapshotProperties( conf.getRatisSnapshotThreshold()); } + public static void checkRatisException(IOException e, String port, + String scmId) throws ServiceException { + if (SCMHAUtils.isNonRetriableException(e)) { + throw new ServiceException(new NonRetriableException(e)); + } else if (SCMHAUtils.isRetriableWithNoFailoverException(e)) { + throw new ServiceException(new RetriableWithNoFailoverException(e)); + } else if (SCMHAUtils.getNotLeaderException(e) != null) { + NotLeaderException nle = + (NotLeaderException) SCMHAUtils.getNotLeaderException(e); + throw new ServiceException(ServerNotLeaderException + .convertToNotLeaderException(nle, + SCMRatisServerImpl.getSelfPeerId(scmId), port)); + } + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java index 8b016d5b9434..4cbe25ffbec6 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java @@ -54,6 +54,7 @@ import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.apache.ratis.protocol.SetConfigurationRequest; import org.apache.ratis.server.RaftServer; +import org.apache.ratis.thirdparty.com.google.protobuf.ByteString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -246,8 +247,15 @@ public List getRatisRoles() throws IOException { */ @Override public NotLeaderException triggerNotLeaderException() { - return new NotLeaderException( - division.getMemberId(), null, division.getGroup().getPeers()); + ByteString leaderId = + division.getInfo().getRoleInfoProto().getFollowerInfo().getLeaderInfo() + .getId().getId(); + RaftPeer suggestedLeader = leaderId.isEmpty() ? + null : + division.getRaftConf().getPeer(RaftPeerId.valueOf(leaderId)); + return new NotLeaderException(division.getMemberId(), + suggestedLeader, + division.getGroup().getPeers()); } @Override @@ -289,7 +297,7 @@ private static RaftGroup buildRaftGroup(SCMNodeDetails details, String scmId, String clusterId) { Preconditions.checkNotNull(scmId); final RaftGroupId groupId = buildRaftGroupId(clusterId); - RaftPeerId selfPeerId = RaftPeerId.getRaftPeerId(scmId); + RaftPeerId selfPeerId = getSelfPeerId(scmId); RaftPeer localRaftPeer = RaftPeer.newBuilder().setId(selfPeerId) // TODO : Should we use IP instead of hostname?? @@ -303,6 +311,10 @@ private static RaftGroup buildRaftGroup(SCMNodeDetails details, return group; } + public static RaftPeerId getSelfPeerId(String scmId) { + return RaftPeerId.getRaftPeerId(scmId); + } + @VisibleForTesting public static RaftGroupId buildRaftGroupId(String clusterId) { Preconditions.checkNotNull(clusterId); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java index 6829f959e925..088f74d3845a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/SCMSecurityProtocolServerSideTranslatorPB.java @@ -37,8 +37,7 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMSecurityResponse; import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.Status; import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolPB; -import org.apache.hadoop.hdds.scm.ha.RetriableWithNoFailoverException; -import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; +import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.hdds.security.exception.SCMSecurityException; import org.apache.hadoop.hdds.security.x509.crl.CRLInfo; @@ -48,7 +47,6 @@ import com.google.protobuf.ProtocolMessageEnum; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; -import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -89,9 +87,9 @@ public SCMSecurityResponse submitRequest(RpcController controller, // primary SCM may not be leader SCM. if (!request.getCmdType().equals(GetSCMCertificate)) { if (!scm.checkLeader()) { - throw new ServiceException(scm.getScmHAManager() - .getRatisServer() - .triggerNotLeaderException()); + RatisUtil.checkRatisException( + scm.getScmHAManager().getRatisServer().triggerNotLeaderException(), + scm.getSecurityProtocolRpcPort(), scm.getScmId()); } } return dispatcher.processRequest(request, this::processRequest, @@ -148,11 +146,8 @@ public SCMSecurityResponse processRequest(SCMSecurityRequest request) "Unknown request type: " + request.getCmdType()); } } catch (IOException e) { - if (SCMHAUtils.isRetriableWithNoFailoverException(e)) { - throw new ServiceException(new RetriableWithNoFailoverException(e)); - } else if (e instanceof NotLeaderException) { - throw new ServiceException(e); - } + RatisUtil.checkRatisException(e, scm.getSecurityProtocolRpcPort(), + scm.getScmId()); scmSecurityResponse.setSuccess(false); scmSecurityResponse.setStatus(exceptionToResponseStatus(e)); // If actual cause is set in SCMSecurityException, set message with diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java index 950e6da925e4..1938afe94a5a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/ScmBlockLocationProtocolServerSideTranslatorPB.java @@ -42,8 +42,7 @@ import org.apache.hadoop.hdds.scm.container.common.helpers.AllocatedBlock; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.exceptions.SCMException; -import org.apache.hadoop.hdds.scm.ha.RetriableWithNoFailoverException; -import org.apache.hadoop.hdds.scm.ha.SCMHAUtils; +import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolPB; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; @@ -55,7 +54,6 @@ import com.google.protobuf.ProtocolMessageEnum; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; -import org.apache.ratis.protocol.exceptions.NotLeaderException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -107,9 +105,9 @@ private SCMBlockLocationResponse.Builder createSCMBlockResponse( public SCMBlockLocationResponse send(RpcController controller, SCMBlockLocationRequest request) throws ServiceException { if (!scm.getScmContext().isLeader()) { - throw new ServiceException(scm.getScmHAManager() - .getRatisServer() - .triggerNotLeaderException()); + RatisUtil.checkRatisException( + scm.getScmHAManager().getRatisServer().triggerNotLeaderException(), + scm.getBlockProtocolRpcPort(), scm.getScmId()); } return dispatcher.processRequest( request, @@ -155,11 +153,8 @@ private SCMBlockLocationResponse processMessage( " in ScmBlockLocationProtocol"); } } catch (IOException e) { - if (SCMHAUtils.isRetriableWithNoFailoverException(e)) { - throw new ServiceException(new RetriableWithNoFailoverException(e)); - } else if (e instanceof NotLeaderException) { - throw new ServiceException(e); - } + RatisUtil.checkRatisException(e, scm.getBlockProtocolRpcPort(), + scm.getScmId()); response.setSuccess(false); response.setStatus(exceptionToResponseStatus(e)); if (e.getMessage() != null) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java index 68046fe28e78..6690c7f67e19 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java @@ -78,6 +78,7 @@ import org.apache.hadoop.hdds.scm.ScmInfo; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline; +import org.apache.hadoop.hdds.scm.ha.RatisUtil; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolPB; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; @@ -140,9 +141,9 @@ public ScmContainerLocationResponse submitRequest(RpcController controller, // not leader or not belong to admin command. if (!scm.getScmContext().isLeader() && !ADMIN_COMMAND_TYPE.contains(request.getCmdType())) { - throw new ServiceException(scm.getScmHAManager() - .getRatisServer() - .triggerNotLeaderException()); + RatisUtil.checkRatisException( + scm.getScmHAManager().getRatisServer().triggerNotLeaderException(), + scm.getClientRpcPort(), scm.getScmId()); } return dispatcher .processRequest(request, this::processRequest, request.getCmdType(), @@ -329,6 +330,8 @@ public ScmContainerLocationResponse processRequest( "Unknown command type: " + request.getCmdType()); } } catch (IOException e) { + RatisUtil + .checkRatisException(e, scm.getClientRpcPort(), scm.getScmId()); throw new ServiceException(e); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 30df2c08aad3..13ee92161a1d 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -1098,6 +1098,16 @@ public String getClientRpcPort() { return addr == null ? "0" : Integer.toString(addr.getPort()); } + public String getBlockProtocolRpcPort() { + InetSocketAddress addr = getBlockProtocolServer().getBlockRpcAddress(); + return addr == null ? "0" : Integer.toString(addr.getPort()); + } + + public String getSecurityProtocolRpcPort() { + InetSocketAddress addr = getSecurityProtocolServer().getRpcAddress(); + return addr == null ? "0" : Integer.toString(addr.getPort()); + } + /** * Returns listening address of StorageDatanode Protocol RPC server. * diff --git a/hadoop-ozone/dist/src/main/compose/compatibility/docker-config b/hadoop-ozone/dist/src/main/compose/compatibility/docker-config index 940f5e9818e2..3fd4ca02eced 100644 --- a/hadoop-ozone/dist/src/main/compose/compatibility/docker-config +++ b/hadoop-ozone/dist/src/main/compose/compatibility/docker-config @@ -27,6 +27,7 @@ OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_ozone.recon.address=recon:9891 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HADOOP_OPTS="-Dhadoop.opts=test" HDFS_STORAGECONTAINERMANAGER_OPTS="-Dhdfs.scm.opts=test" diff --git a/hadoop-ozone/dist/src/main/compose/ozone-csi/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-csi/docker-config index 1d1db1365a65..2488153c0974 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-csi/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-csi/docker-config @@ -31,6 +31,7 @@ OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/ozone-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-ha/docker-config index 70895a0d3fc0..bdc0c4331ccf 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-ha/docker-config @@ -34,6 +34,7 @@ OZONE-SITE.XML_ozone.scm.container.size=1GB OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config index cd8ea0c47c08..34c241cff53e 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config @@ -33,6 +33,7 @@ OZONE-SITE.XML_ozone.replication=1 OZONE-SITE.XML_ozone.client.failover.max.attempts=6 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_hdds.profiler.endpoint.enabled=true +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 ASYNC_PROFILER_HOME=/opt/profiler diff --git a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-config index ff78482573ea..e46ca589684f 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-topology/docker-config @@ -35,6 +35,7 @@ OZONE-SITE.XML_ozone.scm.container.placement.impl=org.apache.hadoop.hdds.scm.con OZONE-SITE.XML_net.topology.node.switch.mapping.impl=org.apache.hadoop.net.TableMapping OZONE-SITE.XML_net.topology.table.file.name=/opt/hadoop/compose/ozone-topology/network-config OZONE-SITE.XML_ozone.network.topology.aware.read=true +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 diff --git a/hadoop-ozone/dist/src/main/compose/ozone/docker-config b/hadoop-ozone/dist/src/main/compose/ozone/docker-config index 47112238424d..c9669a1b9fdf 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone/docker-config @@ -32,6 +32,7 @@ OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_ozone.recon.address=recon:9891 OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config index 1df06e419bb8..762bfeaa4b32 100644 --- a/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozoneblockade/docker-config @@ -37,6 +37,7 @@ OZONE-SITE.XML_hdds.scm.replication.thread.interval=6s OZONE-SITE.XML_hdds.scm.replication.event.timeout=10s OZONE-SITE.XML_dfs.ratis.server.failure.duration=35s OZONE-SITE.XML_hdds.scm.safemode.min.datanode=3 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 diff --git a/hadoop-ozone/dist/src/main/compose/ozones3-haproxy/docker-config b/hadoop-ozone/dist/src/main/compose/ozones3-haproxy/docker-config index 73f775bf39da..711ac7c1c564 100644 --- a/hadoop-ozone/dist/src/main/compose/ozones3-haproxy/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozones3-haproxy/docker-config @@ -27,6 +27,7 @@ OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_ozone.replication=3 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 diff --git a/hadoop-ozone/dist/src/main/compose/ozonescripts/docker-config b/hadoop-ozone/dist/src/main/compose/ozonescripts/docker-config index 45c0c2dbc824..101236b18952 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonescripts/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonescripts/docker-config @@ -25,6 +25,7 @@ OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_ozone.replication=1 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config index 6e67b4072f7c..ae028efcca36 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config @@ -47,6 +47,7 @@ OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.block.token.enabled=true OZONE-SITE.XML_hdds.grpc.tls.enabled=true OZONE-SITE.XML_ozone.replication=3 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config index 2f201478e7fa..14f88c279255 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-mr/docker-config @@ -27,6 +27,7 @@ OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.block.token.enabled=true OZONE-SITE.XML_ozone.replication=3 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE-SITE.XML_hdds.scm.kerberos.principal=scm/scm@EXAMPLE.COM OZONE-SITE.XML_hdds.scm.kerberos.keytab.file=/etc/security/keytabs/scm.keytab OZONE-SITE.XML_ozone.om.kerberos.principal=om/om@EXAMPLE.COM diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure/docker-config index c8d3643e9736..1b45367d6199 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure/docker-config @@ -32,6 +32,7 @@ OZONE-SITE.XML_hdds.block.token.enabled=true OZONE-SITE.XML_hdds.grpc.tls.enabled=true OZONE-SITE.XML_ozone.replication=3 OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon diff --git a/hadoop-ozone/dist/src/main/compose/restart/docker-config b/hadoop-ozone/dist/src/main/compose/restart/docker-config index 83530bb334e0..f3a46541e99c 100644 --- a/hadoop-ozone/dist/src/main/compose/restart/docker-config +++ b/hadoop-ozone/dist/src/main/compose/restart/docker-config @@ -28,6 +28,7 @@ OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_ozone.recon.address=recon:9891 OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/docker-config b/hadoop-ozone/dist/src/main/compose/upgrade/docker-config index 0a489ae9c6d6..e55477d73644 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/docker-config +++ b/hadoop-ozone/dist/src/main/compose/upgrade/docker-config @@ -30,6 +30,7 @@ OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_ozone.recon.address=recon:9891 OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/docker-config b/hadoop-ozone/dist/src/main/compose/xcompat/docker-config index 5519807176c9..0599498cf238 100644 --- a/hadoop-ozone/dist/src/main/compose/xcompat/docker-config +++ b/hadoop-ozone/dist/src/main/compose/xcompat/docker-config @@ -29,5 +29,6 @@ OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 OZONE-SITE.XML_recon.om.snapshot.task.interval.delay=1m +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s no_proxy=om,recon,scm,s3g,kdc,localhost,127.0.0.1 diff --git a/hadoop-ozone/fault-injection-test/network-tests/src/test/compose/docker-config b/hadoop-ozone/fault-injection-test/network-tests/src/test/compose/docker-config index da46f8e0b3ab..eabc95c812a6 100644 --- a/hadoop-ozone/fault-injection-test/network-tests/src/test/compose/docker-config +++ b/hadoop-ozone/fault-injection-test/network-tests/src/test/compose/docker-config @@ -25,6 +25,7 @@ OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_ozone.scm.dead.node.interval=5m OZONE-SITE.XML_ozone.replication=1 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s HDFS-SITE.XML_rpc.metrics.quantile.enable=true HDFS-SITE.XML_rpc.metrics.percentiles.intervals=60,300 LOG4J.PROPERTIES_log4j.rootLogger=INFO, stdout diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index 40d99a798f3c..2da2211d8c42 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMRatisServerImpl; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig; import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider; import org.apache.hadoop.hdds.scm.safemode.HealthyPipelineSafeModeRule; import org.apache.hadoop.hdds.scm.server.OzoneStorageContainerManager; @@ -675,6 +676,10 @@ protected void initializeConfiguration() throws IOException { pipelineNumLimit : DEFAULT_PIPELIME_LIMIT); conf.setTimeDuration(OMConfigKeys.OZONE_OM_RATIS_MINIMUM_TIMEOUT_KEY, DEFAULT_RATIS_RPC_TIMEOUT_SEC, TimeUnit.SECONDS); + SCMClientConfig scmClientConfig = conf.getObject(SCMClientConfig.class); + // default max retry timeout set to 30s + scmClientConfig.setMaxRetryTimeout(30 * 1000); + conf.setFromObject(scmClientConfig); configureTrace(); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestFailoverWithSCMHA.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestFailoverWithSCMHA.java new file mode 100644 index 000000000000..1f0e71fbcd7a --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/scm/TestFailoverWithSCMHA.java @@ -0,0 +1,147 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.ozone.scm; + +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.scm.ha.SCMHAConfiguration; +import org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol; +import org.apache.hadoop.hdds.scm.protocol.StorageContainerLocationProtocol; +import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB; +import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider; +import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig; +import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider; +import org.apache.hadoop.hdds.scm.server.StorageContainerManager; +import org.apache.hadoop.hdds.tracing.TracingUtil; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.slf4j.event.Level; + +import java.io.IOException; +import java.util.UUID; + +/** + * Tests failover with SCM HA setup. + */ +public class TestFailoverWithSCMHA { + private MiniOzoneHAClusterImpl cluster = null; + private OzoneConfiguration conf; + private String clusterId; + private String scmId; + private String omServiceId; + private String scmServiceId; + private int numOfOMs = 1; + private int numOfSCMs = 3; + + private static final long SNAPSHOT_THRESHOLD = 5; + + /** + * Create a MiniOzoneCluster for testing. + * + * @throws IOException + */ + @BeforeEach + public void init() throws Exception { + conf = new OzoneConfiguration(); + clusterId = UUID.randomUUID().toString(); + scmId = UUID.randomUUID().toString(); + omServiceId = "om-service-test1"; + scmServiceId = "scm-service-test1"; + SCMHAConfiguration scmhaConfiguration = + conf.getObject(SCMHAConfiguration.class); + scmhaConfiguration.setRatisSnapshotThreshold(SNAPSHOT_THRESHOLD); + conf.setFromObject(scmhaConfiguration); + + cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newHABuilder(conf) + .setClusterId(clusterId).setScmId(scmId).setOMServiceId(omServiceId) + .setSCMServiceId(scmServiceId).setNumOfOzoneManagers(numOfOMs) + .setNumOfStorageContainerManagers(numOfSCMs).setNumOfActiveSCMs(3) + .build(); + cluster.waitForClusterToBeReady(); + } + + /** + * Shutdown MiniDFSCluster. + */ + @AfterEach + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testFailover() throws Exception { + SCMClientConfig scmClientConfig = + conf.getObject(SCMClientConfig.class); + scmClientConfig.setRetryCount(1); + scmClientConfig.setRetryInterval(100); + scmClientConfig.setMaxRetryTimeout(1500); + Assert.assertEquals(scmClientConfig.getRetryCount(), 15); + conf.setFromObject(scmClientConfig); + StorageContainerManager scm = getLeader(cluster); + Assert.assertNotNull(scm); + SCMBlockLocationFailoverProxyProvider failoverProxyProvider = + new SCMBlockLocationFailoverProxyProvider(conf); + failoverProxyProvider.changeCurrentProxy(scm.getSCMNodeId()); + ScmBlockLocationProtocolClientSideTranslatorPB scmBlockLocationClient = + new ScmBlockLocationProtocolClientSideTranslatorPB( + failoverProxyProvider); + GenericTestUtils + .setLogLevel(SCMBlockLocationFailoverProxyProvider.LOG, Level.DEBUG); + GenericTestUtils.LogCapturer logCapture = GenericTestUtils.LogCapturer + .captureLogs(SCMBlockLocationFailoverProxyProvider.LOG); + ScmBlockLocationProtocol scmBlockLocationProtocol = TracingUtil + .createProxy(scmBlockLocationClient, ScmBlockLocationProtocol.class, + conf); + scmBlockLocationProtocol.getScmInfo(); + Assert.assertTrue(logCapture.getOutput() + .contains("Performing failover to suggested leader")); + scm = getLeader(cluster); + SCMContainerLocationFailoverProxyProvider proxyProvider = + new SCMContainerLocationFailoverProxyProvider(conf); + GenericTestUtils.setLogLevel(SCMContainerLocationFailoverProxyProvider.LOG, + Level.DEBUG); + logCapture = GenericTestUtils.LogCapturer + .captureLogs(SCMContainerLocationFailoverProxyProvider.LOG); + proxyProvider.changeCurrentProxy(scm.getSCMNodeId()); + StorageContainerLocationProtocol scmContainerClient = + TracingUtil.createProxy( + new StorageContainerLocationProtocolClientSideTranslatorPB( + proxyProvider), StorageContainerLocationProtocol.class, conf); + + scmContainerClient.allocateContainer(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.ONE, "ozone"); + Assert.assertTrue(logCapture.getOutput() + .contains("Performing failover to suggested leader")); + } + + static StorageContainerManager getLeader(MiniOzoneHAClusterImpl impl) { + for (StorageContainerManager scm : impl.getStorageContainerManagers()) { + if (scm.checkLeader()) { + return scm; + } + } + return null; + } +}