diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh index cbeb81987e70d..3905c5a63b4de 100755 --- a/dev-support/test-patch.sh +++ b/dev-support/test-patch.sh @@ -454,7 +454,7 @@ checkJavadocWarnings () { JIRA_COMMENT="$JIRA_COMMENT {color:red}-1 javadoc{color}. The javadoc tool appears to have generated `expr $(($numPatchJavadocWarnings-$numTrunkJavadocWarnings))` warning messages. - See $BUILD_URL/artifact/trunk/patchprocess/diffJavadocWarnings.txt for details." + See $BUILD_URL/artifact/PreCommit-HADOOP-Build-patchprocess/diffJavadocWarnings.txt for details." return 1 fi fi @@ -498,7 +498,7 @@ checkJavacWarnings () { {color:red}-1 javac{color}. The applied patch generated $patchJavacWarnings javac compiler warnings (more than the trunk's current $trunkJavacWarnings warnings)." $DIFF $PATCH_DIR/filteredTrunkJavacWarnings.txt $PATCH_DIR/filteredPatchJavacWarnings.txt > $PATCH_DIR/diffJavacWarnings.txt - JIRA_COMMENT_FOOTER="Javac warnings: $BUILD_URL/artifact/trunk/patchprocess/diffJavacWarnings.txt + JIRA_COMMENT_FOOTER="Javac warnings: $BUILD_URL/artifact/PreCommit-HADOOP-Build-patchprocess/diffJavacWarnings.txt $JIRA_COMMENT_FOOTER" return 1 @@ -540,7 +540,7 @@ checkReleaseAuditWarnings () { {color:red}-1 release audit{color}. The applied patch generated $patchReleaseAuditWarnings release audit warnings." $GREP '\!?????' $PATCH_DIR/patchReleaseAuditWarnings.txt > $PATCH_DIR/patchReleaseAuditProblems.txt echo "Lines that start with ????? in the release audit report indicate files that do not have an Apache license header." >> $PATCH_DIR/patchReleaseAuditProblems.txt - JIRA_COMMENT_FOOTER="Release audit warnings: $BUILD_URL/artifact/trunk/patchprocess/patchReleaseAuditProblems.txt + JIRA_COMMENT_FOOTER="Release audit warnings: $BUILD_URL/artifact/PreCommit-HADOOP-Build-patchprocess/patchReleaseAuditProblems.txt $JIRA_COMMENT_FOOTER" return 1 fi @@ -659,7 +659,7 @@ checkFindbugsWarnings () { $PATCH_DIR/newPatchFindbugsWarnings${module_suffix}.xml \ $PATCH_DIR/newPatchFindbugsWarnings${module_suffix}.html if [[ $newFindbugsWarnings > 0 ]] ; then - JIRA_COMMENT_FOOTER="Findbugs warnings: $BUILD_URL/artifact/trunk/patchprocess/newPatchFindbugsWarnings${module_suffix}.html + JIRA_COMMENT_FOOTER="Findbugs warnings: $BUILD_URL/artifact/PreCommit-HADOOP-Build-patchprocess/newPatchFindbugsWarnings${module_suffix}.html $JIRA_COMMENT_FOOTER" fi done diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java index a17b6d495dc58..6c0fbbb0a26ee 100644 --- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java @@ -197,6 +197,8 @@ public void init(Properties config, ServletContext servletContext, client = (CuratorFramework) curatorClientObj; } else { client = createCuratorClient(config); + servletContext.setAttribute( + ZOOKEEPER_SIGNER_SECRET_PROVIDER_CURATOR_CLIENT_ATTRIBUTE, client); } this.tokenValidity = tokenValidity; shouldDisconnect = Boolean.parseBoolean( diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index f0fcab5580f51..e99a19d9e3b70 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -530,6 +530,16 @@ Release 2.6.0 - UNRELEASED HADOOP-10922. User documentation for CredentialShell. (Larry McCay via wang) + HADOOP-11016. KMS should support signing cookies with zookeeper secret + manager. (tucu) + + HADOOP-11106. Document considerations of HAR and Encryption. (clamb via wang) + + HADOOP-10970. Cleanup KMS configuration keys. (wang) + + HADOOP-11017. KMS delegation token secret manager should be able to use + zookeeper as store. (asuresh via tucu) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) @@ -584,6 +594,10 @@ Release 2.6.0 - UNRELEASED HADOOP-10833. Remove unused cache in UserProvider. (Benoy Antony) + HADOOP-11112. TestKMSWithZK does not use KEY_PROVIDER_URI. (tucu via wang) + + HADOOP-11111 MiniKDC to use locale EN_US for case conversions. (stevel) + BUG FIXES HADOOP-10781. Unportable getgrouplist() usage breaks FreeBSD (Dmitry @@ -721,8 +735,8 @@ Release 2.6.0 - UNRELEASED HADOOP-11056. OsSecureRandom.setConf() might leak file descriptors (yzhang via cmccabe) - HDFS-6912. SharedFileDescriptorFactory should not allocate sparse files - (cmccabe) + HADOOP-11040. Return value of read(ByteBuffer buf) in CryptoInputStream is + incorrect in some cases. (Yi Liu via wang) BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS @@ -824,6 +838,21 @@ Release 2.6.0 - UNRELEASED HADOOP-11062. CryptoCodec testcases requiring OpenSSL should be run only if -Pnative is used. (asuresh via tucu) + HADOOP-11099. KMS return HTTP UNAUTHORIZED 401 on ACL failure. (tucu) + + HADOOP-11105. MetricsSystemImpl could leak memory in registered callbacks. + (Chuan Liu via cnauroth) + + HADOOP-10982. KMS: Support for multiple Kerberos principals. (tucu) + + HADOOP-11109. Site build is broken. (Jian He via atm) + + HADOOP-10946. Fix a bunch of typos in log messages (Ray Chiang via aw) + + HADOOP-10131. NetWorkTopology#countNumOfAvailableNodes() is returning + wrong value if excluded nodes passed are not part of the cluster tree + (vinayakumarb) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 0183e292c8a5d..32e95258a1068 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -218,6 +218,19 @@ com.jcraft jsch + + org.apache.curator + curator-test + test + + + org.apache.curator + curator-client + + + org.apache.curator + curator-recipes + com.google.code.findbugs jsr305 diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index 754601a19db75..202098d9774fe 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -328,6 +328,15 @@ esac # # export HADOOP_BALANCER_OPTS="" +### +# HDFS Mover specific parameters +### +# Specify the JVM options to be used when starting the HDFS Mover. +# These options will be appended to the options specified as HADOOP_OPTS +# and therefore may override any similar flags set in HADOOP_OPTS +# +# export HADOOP_MOVER_OPTS="" + ### # Advanced Users Only! ### diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java index e8964ed6ed5b3..68e969737c5e2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java @@ -471,7 +471,16 @@ public int read(ByteBuffer buf) throws IOException { streamOffset += n; // Read n bytes decrypt(buf, n, pos); } - return n; + + if (n >= 0) { + return unread + n; + } else { + if (unread == 0) { + return -1; + } else { + return unread; + } + } } throw new UnsupportedOperationException("ByteBuffer read unsupported " + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java index 6ca0425b5517d..ce99d795f35ce 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java @@ -46,7 +46,8 @@ public abstract KeyProvider createProvider(URI providerName, ) throws IOException; private static final ServiceLoader serviceLoader = - ServiceLoader.load(KeyProviderFactory.class); + ServiceLoader.load(KeyProviderFactory.class, + KeyProviderFactory.class.getClassLoader()); // Iterate through the serviceLoader to avoid lazy loading. // Lazy loading would require synchronization in concurrent use cases. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 899b6c44dc795..a97463ac88156 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -45,6 +45,7 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; +import java.lang.reflect.UndeclaredThrowableException; import java.net.HttpURLConnection; import java.net.SocketTimeoutException; import java.net.URI; @@ -400,6 +401,8 @@ public HttpURLConnection run() throws Exception { }); } catch (IOException ex) { throw ex; + } catch (UndeclaredThrowableException ex) { + throw new IOException(ex.getUndeclaredThrowable()); } catch (Exception ex) { throw new IOException(ex); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java index f960233fb7825..641709d98a36e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java @@ -34,6 +34,7 @@ public class FileEncryptionInfo { private final CipherSuite cipherSuite; private final byte[] edek; private final byte[] iv; + private final String keyName; private final String ezKeyVersionName; /** @@ -42,14 +43,16 @@ public class FileEncryptionInfo { * @param suite CipherSuite used to encrypt the file * @param edek encrypted data encryption key (EDEK) of the file * @param iv initialization vector (IV) used to encrypt the file + * @param keyName name of the key used for the encryption zone * @param ezKeyVersionName name of the KeyVersion used to encrypt the * encrypted data encryption key. */ public FileEncryptionInfo(final CipherSuite suite, final byte[] edek, - final byte[] iv, final String ezKeyVersionName) { + final byte[] iv, final String keyName, final String ezKeyVersionName) { checkNotNull(suite); checkNotNull(edek); checkNotNull(iv); + checkNotNull(keyName); checkNotNull(ezKeyVersionName); checkArgument(edek.length == suite.getAlgorithmBlockSize(), "Unexpected key length"); @@ -58,6 +61,7 @@ public FileEncryptionInfo(final CipherSuite suite, final byte[] edek, this.cipherSuite = suite; this.edek = edek; this.iv = iv; + this.keyName = keyName; this.ezKeyVersionName = ezKeyVersionName; } @@ -83,6 +87,11 @@ public byte[] getIV() { return iv; } + /** + * @return name of the encryption zone key. + */ + public String getKeyName() { return keyName; } + /** * @return name of the encryption zone KeyVersion used to encrypt the * encrypted data encryption key (EDEK). @@ -95,6 +104,7 @@ public String toString() { builder.append("cipherSuite: " + cipherSuite); builder.append(", edek: " + Hex.encodeHexString(edek)); builder.append(", iv: " + Hex.encodeHexString(iv)); + builder.append(", keyName: " + keyName); builder.append(", ezKeyVersionName: " + ezKeyVersionName); builder.append("}"); return builder.toString(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java index b261f7fdedf4b..da3807d307355 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -200,6 +200,15 @@ public long getAccessTime() { public FsPermission getPermission() { return permission; } + + /** + * Tell whether the underlying file or directory is encrypted or not. + * + * @return true if the underlying file is encrypted. + */ + public boolean isEncrypted() { + return permission.getEncryptedBit(); + } /** * Get the owner of the file. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java index ee84437d8e17d..264a095270632 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/FsPermission.java @@ -294,6 +294,13 @@ public boolean getAclBit() { return false; } + /** + * Returns true if the file is encrypted or directory is in an encryption zone + */ + public boolean getEncryptedBit() { + return false; + } + /** Set the user file creation mask (umask) */ public static void setUMask(Configuration conf, FsPermission umask) { conf.set(UMASK_LABEL, String.format("%1$03o", umask.toShort())); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionMismatchException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionMismatchException.java index a72be58832dd4..015c15e8a6d16 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionMismatchException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/VersionMismatchException.java @@ -41,7 +41,7 @@ public VersionMismatchException(byte expectedVersionIn, byte foundVersionIn){ /** Returns a string representation of this object. */ @Override public String toString(){ - return "A record version mismatch occured. Expecting v" + return "A record version mismatch occurred. Expecting v" + expectedVersion + ", found v" + foundVersion; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 2f482c290edc2..84fe5523ec872 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -687,7 +687,8 @@ public Object run() throws IOException, InterruptedException { * a header to the server and starts * the connection thread that waits for responses. */ - private synchronized void setupIOstreams() { + private synchronized void setupIOstreams( + AtomicBoolean fallbackToSimpleAuth) { if (socket != null || shouldCloseConnection.get()) { return; } @@ -738,11 +739,18 @@ public AuthMethod run() remoteId.saslQop = (String)saslRpcClient.getNegotiatedProperty(Sasl.QOP); LOG.debug("Negotiated QOP is :" + remoteId.saslQop); - } else if (UserGroupInformation.isSecurityEnabled() && - !fallbackAllowed) { - throw new IOException("Server asks us to fall back to SIMPLE " + - "auth, but this client is configured to only allow secure " + - "connections."); + if (fallbackToSimpleAuth != null) { + fallbackToSimpleAuth.set(false); + } + } else if (UserGroupInformation.isSecurityEnabled()) { + if (!fallbackAllowed) { + throw new IOException("Server asks us to fall back to SIMPLE " + + "auth, but this client is configured to only allow secure " + + "connections."); + } + if (fallbackToSimpleAuth != null) { + fallbackToSimpleAuth.set(true); + } } } @@ -1375,6 +1383,26 @@ public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, /** * Make a call, passing rpcRequest, to the IPC server defined by * remoteId, returning the rpc respond. + * + * @param rpcKind + * @param rpcRequest - contains serialized method and method parameters + * @param remoteId - the target rpc server + * @param fallbackToSimpleAuth - set to true or false during this method to + * indicate if a secure client falls back to simple auth + * @returns the rpc response + * Throws exceptions if there are network problems or if the remote code + * threw an exception. + */ + public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, + ConnectionId remoteId, AtomicBoolean fallbackToSimpleAuth) + throws IOException { + return call(rpcKind, rpcRequest, remoteId, RPC.RPC_SERVICE_CLASS_DEFAULT, + fallbackToSimpleAuth); + } + + /** + * Make a call, passing rpcRequest, to the IPC server defined by + * remoteId, returning the rpc response. * * @param rpcKind * @param rpcRequest - contains serialized method and method parameters @@ -1386,8 +1414,29 @@ public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, */ public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, ConnectionId remoteId, int serviceClass) throws IOException { + return call(rpcKind, rpcRequest, remoteId, serviceClass, null); + } + + /** + * Make a call, passing rpcRequest, to the IPC server defined by + * remoteId, returning the rpc response. + * + * @param rpcKind + * @param rpcRequest - contains serialized method and method parameters + * @param remoteId - the target rpc server + * @param serviceClass - service class for RPC + * @param fallbackToSimpleAuth - set to true or false during this method to + * indicate if a secure client falls back to simple auth + * @returns the rpc response + * Throws exceptions if there are network problems or if the remote code + * threw an exception. + */ + public Writable call(RPC.RpcKind rpcKind, Writable rpcRequest, + ConnectionId remoteId, int serviceClass, + AtomicBoolean fallbackToSimpleAuth) throws IOException { final Call call = createCall(rpcKind, rpcRequest); - Connection connection = getConnection(remoteId, call, serviceClass); + Connection connection = getConnection(remoteId, call, serviceClass, + fallbackToSimpleAuth); try { connection.sendRpcRequest(call); // send the rpc request } catch (RejectedExecutionException e) { @@ -1444,7 +1493,8 @@ Set getConnectionIds() { /** Get a connection from the pool, or create a new one and add it to the * pool. Connections to a given ConnectionId are reused. */ private Connection getConnection(ConnectionId remoteId, - Call call, int serviceClass) throws IOException { + Call call, int serviceClass, AtomicBoolean fallbackToSimpleAuth) + throws IOException { if (!running.get()) { // the client is stopped throw new IOException("The client is stopped"); @@ -1468,7 +1518,7 @@ private Connection getConnection(ConnectionId remoteId, //block above. The reason for that is if the server happens to be slow, //it will take longer to establish a connection and that will slow the //entire system down. - connection.setupIOstreams(); + connection.setupIOstreams(fallbackToSimpleAuth); return connection; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 0ccdb71d0ee1e..124d835ab15f8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -27,6 +27,7 @@ import java.net.InetSocketAddress; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.SocketFactory; @@ -84,14 +85,23 @@ public ProtocolProxy getProxy(Class protocol, long clientVersion, } @Override - @SuppressWarnings("unchecked") public ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy ) throws IOException { + return getProxy(protocol, clientVersion, addr, ticket, conf, factory, + rpcTimeout, connectionRetryPolicy, null); + } + + @Override + @SuppressWarnings("unchecked") + public ProtocolProxy getProxy(Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) throws IOException { final Invoker invoker = new Invoker(protocol, addr, ticket, conf, factory, - rpcTimeout, connectionRetryPolicy); + rpcTimeout, connectionRetryPolicy, fallbackToSimpleAuth); return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance( protocol.getClassLoader(), new Class[]{protocol}, invoker), false); } @@ -115,13 +125,16 @@ private static class Invoker implements RpcInvocationHandler { private final Client client; private final long clientProtocolVersion; private final String protocolName; + private AtomicBoolean fallbackToSimpleAuth; private Invoker(Class protocol, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, - int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException { + int rpcTimeout, RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) throws IOException { this(protocol, Client.ConnectionId.getConnectionId( addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf), conf, factory); + this.fallbackToSimpleAuth = fallbackToSimpleAuth; } /** @@ -217,7 +230,8 @@ public Object invoke(Object proxy, Method method, Object[] args) final RpcResponseWrapper val; try { val = (RpcResponseWrapper) client.call(RPC.RpcKind.RPC_PROTOCOL_BUFFER, - new RpcRequestWrapper(rpcRequestHeader, theRequest), remoteId); + new RpcRequestWrapper(rpcRequestHeader, theRequest), remoteId, + fallbackToSimpleAuth); } catch (Throwable e) { if (LOG.isTraceEnabled()) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java index 4ae7956c68e52..40f6515e4a04d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java @@ -33,6 +33,7 @@ import java.util.List; import java.util.Map; import java.util.HashMap; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.SocketFactory; @@ -524,6 +525,7 @@ public static T getProxy(Class protocol, * @param conf configuration * @param factory socket factory * @param rpcTimeout max time for each rpc; 0 means no timeout + * @param connectionRetryPolicy retry policy * @return the proxy * @throws IOException if any error occurs */ @@ -535,11 +537,43 @@ public static ProtocolProxy getProtocolProxy(Class protocol, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException { + return getProtocolProxy(protocol, clientVersion, addr, ticket, + conf, factory, rpcTimeout, connectionRetryPolicy, null); + } + + /** + * Get a protocol proxy that contains a proxy connection to a remote server + * and a set of methods that are supported by the server + * + * @param protocol protocol + * @param clientVersion client's version + * @param addr server address + * @param ticket security ticket + * @param conf configuration + * @param factory socket factory + * @param rpcTimeout max time for each rpc; 0 means no timeout + * @param connectionRetryPolicy retry policy + * @param fallbackToSimpleAuth set to true or false during calls to indicate if + * a secure client falls back to simple auth + * @return the proxy + * @throws IOException if any error occurs + */ + public static ProtocolProxy getProtocolProxy(Class protocol, + long clientVersion, + InetSocketAddress addr, + UserGroupInformation ticket, + Configuration conf, + SocketFactory factory, + int rpcTimeout, + RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) + throws IOException { if (UserGroupInformation.isSecurityEnabled()) { SaslRpcServer.init(conf); } - return getProtocolEngine(protocol,conf).getProxy(protocol, clientVersion, - addr, ticket, conf, factory, rpcTimeout, connectionRetryPolicy); + return getProtocolEngine(protocol, conf).getProxy(protocol, clientVersion, + addr, ticket, conf, factory, rpcTimeout, connectionRetryPolicy, + fallbackToSimpleAuth); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java index a8280bd2edffd..047722e649eef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.net.InetSocketAddress; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.SocketFactory; @@ -43,6 +44,14 @@ ProtocolProxy getProxy(Class protocol, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy) throws IOException; + /** Construct a client-side proxy object. */ + ProtocolProxy getProxy(Class protocol, + long clientVersion, InetSocketAddress addr, + UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout, + RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) throws IOException; + /** * Construct a server for a protocol implementation instance. * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 4b2dfe0de1009..c2d9435908aab 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -24,6 +24,7 @@ import java.net.InetSocketAddress; import java.io.*; +import java.util.concurrent.atomic.AtomicBoolean; import javax.net.SocketFactory; @@ -212,14 +213,17 @@ private static class Invoker implements RpcInvocationHandler { private Client.ConnectionId remoteId; private Client client; private boolean isClosed = false; + private final AtomicBoolean fallbackToSimpleAuth; public Invoker(Class protocol, InetSocketAddress address, UserGroupInformation ticket, Configuration conf, SocketFactory factory, - int rpcTimeout) throws IOException { + int rpcTimeout, AtomicBoolean fallbackToSimpleAuth) + throws IOException { this.remoteId = Client.ConnectionId.getConnectionId(address, protocol, ticket, rpcTimeout, conf); this.client = CLIENTS.getClient(conf, factory); + this.fallbackToSimpleAuth = fallbackToSimpleAuth; } @Override @@ -238,7 +242,8 @@ public Object invoke(Object proxy, Method method, Object[] args) ObjectWritable value; try { value = (ObjectWritable) - client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), remoteId); + client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), + remoteId, fallbackToSimpleAuth); } finally { if (traceScope != null) traceScope.close(); } @@ -275,11 +280,25 @@ static Client getClient(Configuration conf) { * talking to a server at the named address. * @param */ @Override - @SuppressWarnings("unchecked") public ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy) + throws IOException { + return getProxy(protocol, clientVersion, addr, ticket, conf, factory, + rpcTimeout, connectionRetryPolicy, null); + } + + /** Construct a client-side proxy object that implements the named protocol, + * talking to a server at the named address. + * @param */ + @Override + @SuppressWarnings("unchecked") + public ProtocolProxy getProxy(Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, + Configuration conf, SocketFactory factory, + int rpcTimeout, RetryPolicy connectionRetryPolicy, + AtomicBoolean fallbackToSimpleAuth) throws IOException { if (connectionRetryPolicy != null) { @@ -289,7 +308,7 @@ public ProtocolProxy getProxy(Class protocol, long clientVersion, T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(), new Class[] { protocol }, new Invoker(protocol, addr, ticket, conf, - factory, rpcTimeout)); + factory, rpcTimeout, fallbackToSimpleAuth)); return new ProtocolProxy(protocol, proxy, true); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java index 57014d5781b30..9c9164eaf4369 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics/util/MetricsDynamicMBeanBase.java @@ -160,7 +160,7 @@ else if (attributeName.endsWith(MIN_TIME)) else if (attributeName.endsWith(MAX_TIME)) return or.getMaxTime(); else { - MetricsUtil.LOG.error("Unexpected attrubute suffix"); + MetricsUtil.LOG.error("Unexpected attribute suffix"); throw new AttributeNotFoundException(); } } else { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index 722abd95c4ae2..2107e68895b52 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -83,7 +83,12 @@ enum InitMode { NORMAL, STANDBY } private final Map allSources; private final Map sinks; private final Map allSinks; + + // The callback list is used by register(Callback callback), while + // the callback map is used by register(String name, String desc, T sink) private final List callbacks; + private final Map namedCallbacks; + private final MetricsCollectorImpl collector; private final MetricsRegistry registry = new MetricsRegistry(MS_NAME); @Metric({"Snapshot", "Snapshot stats"}) MutableStat snapshotStat; @@ -119,6 +124,7 @@ public MetricsSystemImpl(String prefix) { sourceConfigs = Maps.newHashMap(); sinkConfigs = Maps.newHashMap(); callbacks = Lists.newArrayList(); + namedCallbacks = Maps.newHashMap(); injectedTags = Lists.newArrayList(); collector = new MetricsCollectorImpl(); if (prefix != null) { @@ -178,11 +184,13 @@ public synchronized void start() { return; } for (Callback cb : callbacks) cb.preStart(); + for (Callback cb : namedCallbacks.values()) cb.preStart(); configure(prefix); startTimer(); monitoring = true; LOG.info(prefix +" metrics system started"); for (Callback cb : callbacks) cb.postStart(); + for (Callback cb : namedCallbacks.values()) cb.postStart(); } @Override @@ -198,6 +206,7 @@ public synchronized void stop() { return; } for (Callback cb : callbacks) cb.preStop(); + for (Callback cb : namedCallbacks.values()) cb.preStop(); LOG.info("Stopping "+ prefix +" metrics system..."); stopTimer(); stopSources(); @@ -206,6 +215,7 @@ public synchronized void stop() { monitoring = false; LOG.info(prefix +" metrics system stopped."); for (Callback cb : callbacks) cb.postStop(); + for (Callback cb : namedCallbacks.values()) cb.postStop(); } @Override public synchronized @@ -224,7 +234,7 @@ T register(String name, String desc, T source) { } // We want to re-register the source to pick up new config when the // metrics system restarts. - register(new AbstractCallback() { + register(name, new AbstractCallback() { @Override public void postStart() { registerSource(finalName, finalDesc, s); } @@ -241,6 +251,9 @@ void unregisterSource(String name) { if (allSources.containsKey(name)) { allSources.remove(name); } + if (namedCallbacks.containsKey(name)) { + namedCallbacks.remove(name); + } } synchronized @@ -268,7 +281,7 @@ T register(final String name, final String description, final T sink) { } // We want to re-register the sink to pick up new config // when the metrics system restarts. - register(new AbstractCallback() { + register(name, new AbstractCallback() { @Override public void postStart() { register(name, description, sink); } @@ -289,9 +302,16 @@ synchronized void registerSink(String name, String desc, MetricsSink sink) { @Override public synchronized void register(final Callback callback) { - callbacks.add((Callback) Proxy.newProxyInstance( - callback.getClass().getClassLoader(), new Class[] { Callback.class }, - new InvocationHandler() { + callbacks.add((Callback) getProxyForCallback(callback)); + } + + private synchronized void register(String name, final Callback callback) { + namedCallbacks.put(name, (Callback) getProxyForCallback(callback)); + } + + private Object getProxyForCallback(final Callback callback) { + return Proxy.newProxyInstance(callback.getClass().getClassLoader(), + new Class[] { Callback.class }, new InvocationHandler() { @Override public Object invoke(Object proxy, Method method, Object[] args) throws Throwable { @@ -299,11 +319,11 @@ public Object invoke(Object proxy, Method method, Object[] args) return method.invoke(callback, args); } catch (Exception e) { // These are not considered fatal. - LOG.warn("Caught exception in callback "+ method.getName(), e); + LOG.warn("Caught exception in callback " + method.getName(), e); } return null; } - })); + }); } @Override @@ -572,6 +592,7 @@ public synchronized boolean shutdown() { allSources.clear(); allSinks.clear(); callbacks.clear(); + namedCallbacks.clear(); if (mbeanName != null) { MBeans.unregister(mbeanName); mbeanName = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java index 6ee6db769ad1c..5f11367d4914b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java @@ -26,6 +26,7 @@ import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -689,6 +690,12 @@ protected Random getRandom() { return rand; } + @VisibleForTesting + void setRandomSeed(long seed) { + Random rand = getRandom(); + rand.setSeed(seed); + } + /** randomly choose one node from scope * if scope starts with ~, choose one from the all nodes except for the * ones in scope; otherwise, choose one from scope @@ -775,25 +782,35 @@ public int countNumOfAvailableNodes(String scope, scope=scope.substring(1); } scope = NodeBase.normalize(scope); - int count=0; // the number of nodes in both scope & excludedNodes + int excludedCountInScope = 0; // the number of nodes in both scope & excludedNodes + int excludedCountOffScope = 0; // the number of nodes outside scope & excludedNodes netlock.readLock().lock(); try { - for(Node node:excludedNodes) { - if ((NodeBase.getPath(node)+NodeBase.PATH_SEPARATOR_STR). - startsWith(scope+NodeBase.PATH_SEPARATOR_STR)) { - count++; + for (Node node : excludedNodes) { + node = getNode(NodeBase.getPath(node)); + if (node == null) { + continue; + } + if ((NodeBase.getPath(node) + NodeBase.PATH_SEPARATOR_STR) + .startsWith(scope + NodeBase.PATH_SEPARATOR_STR)) { + excludedCountInScope++; + } else { + excludedCountOffScope++; } } - Node n=getNode(scope); - int scopeNodeCount=1; + Node n = getNode(scope); + int scopeNodeCount = 0; + if (n != null) { + scopeNodeCount++; + } if (n instanceof InnerNode) { scopeNodeCount=((InnerNode)n).getNumOfLeaves(); } if (isExcluded) { - return clusterMap.getNumOfLeaves()- - scopeNodeCount-excludedNodes.size()+count; + return clusterMap.getNumOfLeaves() - scopeNodeCount + - excludedCountOffScope; } else { - return scopeNodeCount-count; + return scopeNodeCount - excludedCountInScope; } } finally { netlock.readLock().unlock(); @@ -870,21 +887,19 @@ protected int getWeight(Node reader, Node node) { /** * Sort nodes array by network distance to reader. *

- * In a three-level topology, a node can be either local, on the same rack, or - * on a different rack from the reader. Sorting the nodes based on network - * distance from the reader reduces network traffic and improves performance. + * In a three-level topology, a node can be either local, on the same rack, + * or on a different rack from the reader. Sorting the nodes based on network + * distance from the reader reduces network traffic and improves + * performance. *

* As an additional twist, we also randomize the nodes at each network - * distance using the provided random seed. This helps with load balancing - * when there is data skew. - * - * @param reader Node where data will be read - * @param nodes Available replicas with the requested data - * @param seed Used to seed the pseudo-random generator that randomizes the - * set of nodes at each network distance. + * distance. This helps with load balancing when there is data skew. + * + * @param reader Node where data will be read + * @param nodes Available replicas with the requested data + * @param activeLen Number of active nodes at the front of the array */ - public void sortByDistance(Node reader, Node[] nodes, int activeLen, - long seed, boolean randomizeBlockLocationsPerBlock) { + public void sortByDistance(Node reader, Node[] nodes, int activeLen) { /** Sort weights for the nodes array */ int[] weights = new int[activeLen]; for (int i=0; i list: tree.values()) { if (list != null) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java index cc598c0986fb2..13160ebba06f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java @@ -268,19 +268,17 @@ protected int getWeight(Node reader, Node node) { /** * Sort nodes array by their distances to reader. *

- * This is the same as - * {@link NetworkTopology#sortByDistance(Node, Node[], long)} except with a - * four-level network topology which contains the additional network distance - * of a "node group" which is between local and same rack. - * - * @param reader Node where data will be read - * @param nodes Available replicas with the requested data - * @param seed Used to seed the pseudo-random generator that randomizes the - * set of nodes at each network distance. + * This is the same as {@link NetworkTopology#sortByDistance(Node, Node[], + * int)} except with a four-level network topology which contains the + * additional network distance of a "node group" which is between local and + * same rack. + * + * @param reader Node where data will be read + * @param nodes Available replicas with the requested data + * @param activeLen Number of active nodes at the front of the array */ @Override - public void sortByDistance(Node reader, Node[] nodes, int activeLen, - long seed, boolean randomizeBlockLocationsPerBlock) { + public void sortByDistance(Node reader, Node[] nodes, int activeLen) { // If reader is not a datanode (not in NetworkTopology tree), we need to // replace this reader with a sibling leaf node in tree. if (reader != null && !this.contains(reader)) { @@ -293,8 +291,7 @@ public void sortByDistance(Node reader, Node[] nodes, int activeLen, return; } } - super.sortByDistance(reader, nodes, activeLen, seed, - randomizeBlockLocationsPerBlock); + super.sortByDistance(reader, nodes, activeLen); } /** InnerNodeWithNodeGroup represents a switch/router of a data center, rack diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java index b9e26b545c363..f5e7bc9c0dee0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java @@ -127,7 +127,7 @@ public void startThreads() throws IOException { public synchronized void reset() { currentId = 0; allKeys.clear(); - delegationTokenSequenceNumber = 0; + setDelegationTokenSeqNum(0); currentTokens.clear(); } @@ -141,7 +141,7 @@ public synchronized void addKey(DelegationKey key) throws IOException { if (key.getKeyId() > currentId) { currentId = key.getKeyId(); } - allKeys.put(key.getKeyId(), key); + storeDelegationKey(key); } public synchronized DelegationKey[] getAllKeys() { @@ -163,24 +163,108 @@ protected void storeNewMasterKey(DelegationKey key) throws IOException { return; } + // for ZK based secretManager + protected void updateMasterKey(DelegationKey key) throws IOException{ + return; + } + // RM protected void removeStoredMasterKey(DelegationKey key) { return; } // RM - protected void storeNewToken(TokenIdent ident, long renewDate) { + protected void storeNewToken(TokenIdent ident, long renewDate) throws IOException{ return; } + // RM protected void removeStoredToken(TokenIdent ident) throws IOException { } // RM - protected void updateStoredToken(TokenIdent ident, long renewDate) { + protected void updateStoredToken(TokenIdent ident, long renewDate) throws IOException { return; } + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected int getDelegationTokenSeqNum() { + return delegationTokenSequenceNumber; + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected int incrementDelegationTokenSeqNum() { + return ++delegationTokenSequenceNumber; + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected void setDelegationTokenSeqNum(int seqNum) { + delegationTokenSequenceNumber = seqNum; + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected DelegationKey getDelegationKey(int keyId) { + return allKeys.get(keyId); + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected void storeDelegationKey(DelegationKey key) throws IOException { + allKeys.put(key.getKeyId(), key); + storeNewMasterKey(key); + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected void updateDelegationKey(DelegationKey key) throws IOException { + allKeys.put(key.getKeyId(), key); + updateMasterKey(key); + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { + return currentTokens.get(ident); + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected void storeToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + currentTokens.put(ident, tokenInfo); + storeNewToken(ident, tokenInfo.getRenewDate()); + } + + /** + * For subclasses externalizing the storage, for example Zookeeper + * based implementations + */ + protected void updateToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + currentTokens.put(ident, tokenInfo); + updateStoredToken(ident, tokenInfo.getRenewDate()); + } + /** * This method is intended to be used for recovering persisted delegation * tokens @@ -196,17 +280,18 @@ public synchronized void addPersistedDelegationToken( "Can't add persisted delegation token to a running SecretManager."); } int keyId = identifier.getMasterKeyId(); - DelegationKey dKey = allKeys.get(keyId); + DelegationKey dKey = getDelegationKey(keyId); if (dKey == null) { LOG.warn("No KEY found for persisted identifier " + identifier.toString()); return; } byte[] password = createPassword(identifier.getBytes(), dKey.getKey()); - if (identifier.getSequenceNumber() > this.delegationTokenSequenceNumber) { - this.delegationTokenSequenceNumber = identifier.getSequenceNumber(); + int delegationTokenSeqNum = getDelegationTokenSeqNum(); + if (identifier.getSequenceNumber() > delegationTokenSeqNum) { + setDelegationTokenSeqNum(identifier.getSequenceNumber()); } - if (currentTokens.get(identifier) == null) { - currentTokens.put(identifier, new DelegationTokenInformation(renewDate, + if (getTokenInfo(identifier) == null) { + storeToken(identifier, new DelegationTokenInformation(renewDate, password, getTrackingIdIfEnabled(identifier))); } else { throw new IOException("Same delegation token being added twice."); @@ -234,7 +319,7 @@ private void updateCurrentKey() throws IOException { synchronized (this) { currentId = newKey.getKeyId(); currentKey = newKey; - allKeys.put(currentKey.getKeyId(), currentKey); + storeDelegationKey(currentKey); } } @@ -252,7 +337,7 @@ void rollMasterKey() throws IOException { * updateMasterKey() isn't called at expected interval. Add it back to * allKeys just in case. */ - allKeys.put(currentKey.getKeyId(), currentKey); + updateDelegationKey(currentKey); } updateCurrentKey(); } @@ -276,19 +361,25 @@ private synchronized void removeExpiredKeys() { protected synchronized byte[] createPassword(TokenIdent identifier) { int sequenceNum; long now = Time.now(); - sequenceNum = ++delegationTokenSequenceNumber; + sequenceNum = incrementDelegationTokenSeqNum(); identifier.setIssueDate(now); identifier.setMaxDate(now + tokenMaxLifetime); identifier.setMasterKeyId(currentId); identifier.setSequenceNumber(sequenceNum); LOG.info("Creating password for identifier: " + identifier); byte[] password = createPassword(identifier.getBytes(), currentKey.getKey()); - storeNewToken(identifier, now + tokenRenewInterval); - currentTokens.put(identifier, new DelegationTokenInformation(now - + tokenRenewInterval, password, getTrackingIdIfEnabled(identifier))); + DelegationTokenInformation tokenInfo = new DelegationTokenInformation(now + + tokenRenewInterval, password, getTrackingIdIfEnabled(identifier)); + try { + storeToken(identifier, tokenInfo); + } catch (IOException ioe) { + LOG.error("Could not store token !!", ioe); + } return password; } + + /** * Find the DelegationTokenInformation for the given token id, and verify that * if the token is expired. Note that this method should be called with @@ -297,7 +388,7 @@ protected synchronized byte[] createPassword(TokenIdent identifier) { protected DelegationTokenInformation checkToken(TokenIdent identifier) throws InvalidToken { assert Thread.holdsLock(this); - DelegationTokenInformation info = currentTokens.get(identifier); + DelegationTokenInformation info = getTokenInfo(identifier); if (info == null) { throw new InvalidToken("token (" + identifier.toString() + ") can't be found in cache"); @@ -322,7 +413,7 @@ protected String getTrackingIdIfEnabled(TokenIdent ident) { } public synchronized String getTokenTrackingId(TokenIdent identifier) { - DelegationTokenInformation info = currentTokens.get(identifier); + DelegationTokenInformation info = getTokenInfo(identifier); if (info == null) { return null; } @@ -373,7 +464,7 @@ public synchronized long renewToken(Token token, throw new AccessControlException(renewer + " tries to renew a token with renewer " + id.getRenewer()); } - DelegationKey key = allKeys.get(id.getMasterKeyId()); + DelegationKey key = getDelegationKey(id.getMasterKeyId()); if (key == null) { throw new InvalidToken("Unable to find master key for keyId=" + id.getMasterKeyId() @@ -390,11 +481,10 @@ public synchronized long renewToken(Token token, DelegationTokenInformation info = new DelegationTokenInformation(renewTime, password, trackingId); - if (currentTokens.get(id) == null) { + if (getTokenInfo(id) == null) { throw new InvalidToken("Renewal request for unknown token"); } - currentTokens.put(id, info); - updateStoredToken(id, renewTime); + updateToken(id, info); return renewTime; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java new file mode 100644 index 0000000000000..23c7144501752 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java @@ -0,0 +1,727 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.token.delegation; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import javax.security.auth.login.AppConfigurationEntry; + +import org.apache.curator.ensemble.fixed.FixedEnsembleProvider; +import org.apache.curator.framework.CuratorFramework; +import org.apache.curator.framework.CuratorFrameworkFactory; +import org.apache.curator.framework.CuratorFrameworkFactory.Builder; +import org.apache.curator.framework.api.ACLProvider; +import org.apache.curator.framework.imps.DefaultACLProvider; +import org.apache.curator.framework.recipes.cache.PathChildrenCache; +import org.apache.curator.framework.recipes.cache.PathChildrenCache.StartMode; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheEvent; +import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener; +import org.apache.curator.framework.recipes.shared.SharedCount; +import org.apache.curator.retry.RetryNTimes; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs.Perms; +import org.apache.zookeeper.client.ZooKeeperSaslClient; +import org.apache.zookeeper.data.ACL; +import org.apache.zookeeper.data.Id; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +/** + * An implementation of {@link AbstractDelegationTokenSecretManager} that + * persists TokenIdentifiers and DelegationKeys in Zookeeper. This class can + * be used by HA (Highly available) services that consists of multiple nodes. + * This class ensures that Identifiers and Keys are replicated to all nodes of + * the service. + */ +@InterfaceAudience.Private +public abstract class ZKDelegationTokenSecretManager + extends AbstractDelegationTokenSecretManager { + + private static final String ZK_CONF_PREFIX = "zk-dt-secret-manager."; + public static final String ZK_DTSM_ZK_NUM_RETRIES = ZK_CONF_PREFIX + + "zkNumRetries"; + public static final String ZK_DTSM_ZK_SESSION_TIMEOUT = ZK_CONF_PREFIX + + "zkSessionTimeout"; + public static final String ZK_DTSM_ZK_CONNECTION_TIMEOUT = ZK_CONF_PREFIX + + "zkConnectionTimeout"; + public static final String ZK_DTSM_ZNODE_WORKING_PATH = ZK_CONF_PREFIX + + "znodeWorkingPath"; + public static final String ZK_DTSM_ZK_AUTH_TYPE = ZK_CONF_PREFIX + + "zkAuthType"; + public static final String ZK_DTSM_ZK_CONNECTION_STRING = ZK_CONF_PREFIX + + "zkConnectionString"; + public static final String ZK_DTSM_ZK_KERBEROS_KEYTAB = ZK_CONF_PREFIX + + "kerberos.keytab"; + public static final String ZK_DTSM_ZK_KERBEROS_PRINCIPAL = ZK_CONF_PREFIX + + "kerberos.principal"; + + public static final int ZK_DTSM_ZK_NUM_RETRIES_DEFAULT = 3; + public static final int ZK_DTSM_ZK_SESSION_TIMEOUT_DEFAULT = 10000; + public static final int ZK_DTSM_ZK_CONNECTION_TIMEOUT_DEFAULT = 10000; + public static final String ZK_DTSM_ZNODE_WORKING_PATH_DEAFULT = "zkdtsm"; + + private static Logger LOG = LoggerFactory + .getLogger(ZKDelegationTokenSecretManager.class); + + private static final String JAAS_LOGIN_ENTRY_NAME = + "ZKDelegationTokenSecretManagerClient"; + + private static final String ZK_DTSM_NAMESPACE = "ZKDTSMRoot"; + private static final String ZK_DTSM_SEQNUM_ROOT = "ZKDTSMSeqNumRoot"; + private static final String ZK_DTSM_TOKENS_ROOT = "ZKDTSMTokensRoot"; + private static final String ZK_DTSM_MASTER_KEY_ROOT = "ZKDTSMMasterKeyRoot"; + + private static final String DELEGATION_KEY_PREFIX = "DK_"; + private static final String DELEGATION_TOKEN_PREFIX = "DT_"; + + private static final ThreadLocal CURATOR_TL = + new ThreadLocal(); + + public static void setCurator(CuratorFramework curator) { + CURATOR_TL.set(curator); + } + + private final boolean isExternalClient; + private final CuratorFramework zkClient; + private SharedCount seqCounter; + private PathChildrenCache keyCache; + private PathChildrenCache tokenCache; + private ExecutorService listenerThreadPool; + + public ZKDelegationTokenSecretManager(Configuration conf) { + super(conf.getLong(DelegationTokenManager.UPDATE_INTERVAL, + DelegationTokenManager.UPDATE_INTERVAL_DEFAULT) * 1000, + conf.getLong(DelegationTokenManager.MAX_LIFETIME, + DelegationTokenManager.MAX_LIFETIME_DEFAULT) * 1000, + conf.getLong(DelegationTokenManager.RENEW_INTERVAL, + DelegationTokenManager.RENEW_INTERVAL_DEFAULT * 1000), + conf.getLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, + DelegationTokenManager.REMOVAL_SCAN_INTERVAL_DEFAULT) * 1000); + if (CURATOR_TL.get() != null) { + zkClient = CURATOR_TL.get(); + isExternalClient = true; + } else { + String connString = conf.get(ZK_DTSM_ZK_CONNECTION_STRING); + Preconditions.checkNotNull(connString, + "Zookeeper connection string cannot be null"); + String authType = conf.get(ZK_DTSM_ZK_AUTH_TYPE); + + // AuthType has to be explicitly set to 'none' or 'sasl' + Preconditions.checkNotNull(authType, "Zookeeper authType cannot be null !!"); + Preconditions.checkArgument( + authType.equals("sasl") || authType.equals("none"), + "Zookeeper authType must be one of [none, sasl]"); + + Builder builder = null; + try { + ACLProvider aclProvider = null; + if (authType.equals("sasl")) { + LOG.info("Connecting to ZooKeeper with SASL/Kerberos" + + "and using 'sasl' ACLs"); + String principal = setJaasConfiguration(conf); + System.setProperty(ZooKeeperSaslClient.LOGIN_CONTEXT_NAME_KEY, + JAAS_LOGIN_ENTRY_NAME); + System.setProperty("zookeeper.authProvider.1", + "org.apache.zookeeper.server.auth.SASLAuthenticationProvider"); + aclProvider = new SASLOwnerACLProvider(principal); + } else { // "none" + LOG.info("Connecting to ZooKeeper without authentication"); + aclProvider = new DefaultACLProvider(); // open to everyone + } + int sessionT = + conf.getInt(ZK_DTSM_ZK_SESSION_TIMEOUT, + ZK_DTSM_ZK_SESSION_TIMEOUT_DEFAULT); + int numRetries = + conf.getInt(ZK_DTSM_ZK_NUM_RETRIES, ZK_DTSM_ZK_NUM_RETRIES_DEFAULT); + builder = + CuratorFrameworkFactory + .builder() + .aclProvider(aclProvider) + .namespace( + conf.get(ZK_DTSM_ZNODE_WORKING_PATH, + ZK_DTSM_ZNODE_WORKING_PATH_DEAFULT) + + "/" + + ZK_DTSM_NAMESPACE + ) + .sessionTimeoutMs(sessionT) + .connectionTimeoutMs( + conf.getInt(ZK_DTSM_ZK_CONNECTION_TIMEOUT, + ZK_DTSM_ZK_CONNECTION_TIMEOUT_DEFAULT) + ) + .retryPolicy( + new RetryNTimes(numRetries, sessionT / numRetries)); + } catch (Exception ex) { + throw new RuntimeException("Could not Load ZK acls or auth"); + } + zkClient = builder.ensembleProvider(new FixedEnsembleProvider(connString)) + .build(); + isExternalClient = false; + } + listenerThreadPool = Executors.newFixedThreadPool(2); + } + + private String setJaasConfiguration(Configuration config) throws Exception { + String keytabFile = + config.get(ZK_DTSM_ZK_KERBEROS_KEYTAB, "").trim(); + if (keytabFile == null || keytabFile.length() == 0) { + throw new IllegalArgumentException(ZK_DTSM_ZK_KERBEROS_KEYTAB + + " must be specified"); + } + String principal = + config.get(ZK_DTSM_ZK_KERBEROS_PRINCIPAL, "").trim(); + if (principal == null || principal.length() == 0) { + throw new IllegalArgumentException(ZK_DTSM_ZK_KERBEROS_PRINCIPAL + + " must be specified"); + } + + JaasConfiguration jConf = + new JaasConfiguration(JAAS_LOGIN_ENTRY_NAME, principal, keytabFile); + javax.security.auth.login.Configuration.setConfiguration(jConf); + return principal.split("[/@]")[0]; + } + + /** + * Creates a programmatic version of a jaas.conf file. This can be used + * instead of writing a jaas.conf file and setting the system property, + * "java.security.auth.login.config", to point to that file. It is meant to be + * used for connecting to ZooKeeper. + */ + @InterfaceAudience.Private + public static class JaasConfiguration extends + javax.security.auth.login.Configuration { + + private static AppConfigurationEntry[] entry; + private String entryName; + + /** + * Add an entry to the jaas configuration with the passed in name, + * principal, and keytab. The other necessary options will be set for you. + * + * @param entryName + * The name of the entry (e.g. "Client") + * @param principal + * The principal of the user + * @param keytab + * The location of the keytab + */ + public JaasConfiguration(String entryName, String principal, String keytab) { + this.entryName = entryName; + Map options = new HashMap(); + options.put("keyTab", keytab); + options.put("principal", principal); + options.put("useKeyTab", "true"); + options.put("storeKey", "true"); + options.put("useTicketCache", "false"); + options.put("refreshKrb5Config", "true"); + String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG"); + if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) { + options.put("debug", "true"); + } + entry = new AppConfigurationEntry[] { + new AppConfigurationEntry(getKrb5LoginModuleName(), + AppConfigurationEntry.LoginModuleControlFlag.REQUIRED, + options) }; + } + + @Override + public AppConfigurationEntry[] getAppConfigurationEntry(String name) { + return (entryName.equals(name)) ? entry : null; + } + + private String getKrb5LoginModuleName() { + String krb5LoginModuleName; + if (System.getProperty("java.vendor").contains("IBM")) { + krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule"; + } else { + krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule"; + } + return krb5LoginModuleName; + } + } + + @Override + public void startThreads() throws IOException { + if (!isExternalClient) { + try { + zkClient.start(); + } catch (Exception e) { + throw new IOException("Could not start Curator Framework", e); + } + } + try { + seqCounter = new SharedCount(zkClient, ZK_DTSM_SEQNUM_ROOT, 0); + if (seqCounter != null) { + seqCounter.start(); + } + } catch (Exception e) { + throw new IOException("Could not start Sequence Counter", e); + } + try { + createPersistentNode(ZK_DTSM_MASTER_KEY_ROOT); + createPersistentNode(ZK_DTSM_TOKENS_ROOT); + } catch (Exception e) { + throw new RuntimeException("Could not create ZK paths"); + } + try { + keyCache = new PathChildrenCache(zkClient, ZK_DTSM_MASTER_KEY_ROOT, true); + if (keyCache != null) { + keyCache.start(StartMode.POST_INITIALIZED_EVENT); + keyCache.getListenable().addListener(new PathChildrenCacheListener() { + @Override + public void childEvent(CuratorFramework client, + PathChildrenCacheEvent event) + throws Exception { + switch (event.getType()) { + case CHILD_ADDED: + processKeyAddOrUpdate(event.getData().getData()); + break; + case CHILD_UPDATED: + processKeyAddOrUpdate(event.getData().getData()); + break; + case CHILD_REMOVED: + processKeyRemoved(event.getData().getPath()); + break; + default: + break; + } + } + }, listenerThreadPool); + } + } catch (Exception e) { + throw new IOException("Could not start PathChildrenCache for keys", e); + } + try { + tokenCache = new PathChildrenCache(zkClient, ZK_DTSM_TOKENS_ROOT, true); + if (tokenCache != null) { + tokenCache.start(StartMode.POST_INITIALIZED_EVENT); + tokenCache.getListenable().addListener(new PathChildrenCacheListener() { + + @Override + public void childEvent(CuratorFramework client, + PathChildrenCacheEvent event) throws Exception { + switch (event.getType()) { + case CHILD_ADDED: + processTokenAddOrUpdate(event.getData().getData()); + break; + case CHILD_UPDATED: + processTokenAddOrUpdate(event.getData().getData()); + break; + case CHILD_REMOVED: + processTokenRemoved(event.getData().getData()); + break; + default: + break; + } + } + }, listenerThreadPool); + } + } catch (Exception e) { + throw new IOException("Could not start PathChildrenCache for tokens", e); + } + super.startThreads(); + } + + private void processKeyAddOrUpdate(byte[] data) throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(data); + DataInputStream din = new DataInputStream(bin); + DelegationKey key = new DelegationKey(); + key.readFields(din); + allKeys.put(key.getKeyId(), key); + } + + private void processKeyRemoved(String path) { + int i = path.lastIndexOf('/'); + if (i > 0) { + String tokSeg = path.substring(i + 1); + int j = tokSeg.indexOf('_'); + if (j > 0) { + int keyId = Integer.parseInt(tokSeg.substring(j + 1)); + allKeys.remove(keyId); + } + } + } + + private void processTokenAddOrUpdate(byte[] data) throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(data); + DataInputStream din = new DataInputStream(bin); + TokenIdent ident = createIdentifier(); + ident.readFields(din); + long renewDate = din.readLong(); + int pwdLen = din.readInt(); + byte[] password = new byte[pwdLen]; + int numRead = din.read(password, 0, pwdLen); + if (numRead > -1) { + DelegationTokenInformation tokenInfo = + new DelegationTokenInformation(renewDate, password); + currentTokens.put(ident, tokenInfo); + } + } + + private void processTokenRemoved(byte[] data) throws IOException { + ByteArrayInputStream bin = new ByteArrayInputStream(data); + DataInputStream din = new DataInputStream(bin); + TokenIdent ident = createIdentifier(); + ident.readFields(din); + currentTokens.remove(ident); + } + + @Override + public void stopThreads() { + try { + if (!isExternalClient && (zkClient != null)) { + zkClient.close(); + } + if (seqCounter != null) { + seqCounter.close(); + } + if (keyCache != null) { + keyCache.close(); + } + if (tokenCache != null) { + tokenCache.close(); + } + } catch (Exception e) { + LOG.error("Could not stop Curator Framework", e); + // Ignore + } + super.stopThreads(); + } + + private void createPersistentNode(String nodePath) throws Exception { + try { + zkClient.create().withMode(CreateMode.PERSISTENT).forPath(nodePath); + } catch (KeeperException.NodeExistsException ne) { + LOG.debug(nodePath + " znode already exists !!"); + } catch (Exception e) { + throw new IOException(nodePath + " znode could not be created !!", e); + } + } + + @Override + protected int getDelegationTokenSeqNum() { + return seqCounter.getCount(); + } + + @Override + protected int incrementDelegationTokenSeqNum() { + try { + while (!seqCounter.trySetCount(seqCounter.getCount() + 1)) { + } + } catch (Exception e) { + throw new RuntimeException("Could not increment shared counter !!", e); + } + return seqCounter.getCount(); + } + + @Override + protected void setDelegationTokenSeqNum(int seqNum) { + delegationTokenSequenceNumber = seqNum; + } + + @Override + protected DelegationKey getDelegationKey(int keyId) { + // First check if its I already have this key + DelegationKey key = allKeys.get(keyId); + // Then query ZK + if (key == null) { + try { + key = getKeyFromZK(keyId); + if (key != null) { + allKeys.put(keyId, key); + } + } catch (IOException e) { + LOG.error("Error retrieving key [" + keyId + "] from ZK", e); + } + } + return key; + } + + private DelegationKey getKeyFromZK(int keyId) throws IOException { + String nodePath = + getNodePath(ZK_DTSM_MASTER_KEY_ROOT, DELEGATION_KEY_PREFIX + keyId); + try { + byte[] data = zkClient.getData().forPath(nodePath); + if ((data == null) || (data.length == 0)) { + return null; + } + ByteArrayInputStream bin = new ByteArrayInputStream(data); + DataInputStream din = new DataInputStream(bin); + DelegationKey key = new DelegationKey(); + key.readFields(din); + return key; + } catch (KeeperException.NoNodeException e) { + LOG.error("No node in path [" + nodePath + "]"); + } catch (Exception ex) { + throw new IOException(ex); + } + return null; + } + + @Override + protected DelegationTokenInformation getTokenInfo(TokenIdent ident) { + // First check if I have this.. + DelegationTokenInformation tokenInfo = currentTokens.get(ident); + // Then query ZK + if (tokenInfo == null) { + try { + tokenInfo = getTokenInfoFromZK(ident); + if (tokenInfo != null) { + currentTokens.put(ident, tokenInfo); + } + } catch (IOException e) { + LOG.error("Error retrieving tokenInfo [" + ident.getSequenceNumber() + + "] from ZK", e); + } + } + return tokenInfo; + } + + private DelegationTokenInformation getTokenInfoFromZK(TokenIdent ident) + throws IOException { + String nodePath = + getNodePath(ZK_DTSM_TOKENS_ROOT, + DELEGATION_TOKEN_PREFIX + ident.getSequenceNumber()); + try { + byte[] data = zkClient.getData().forPath(nodePath); + if ((data == null) || (data.length == 0)) { + return null; + } + ByteArrayInputStream bin = new ByteArrayInputStream(data); + DataInputStream din = new DataInputStream(bin); + createIdentifier().readFields(din); + long renewDate = din.readLong(); + int pwdLen = din.readInt(); + byte[] password = new byte[pwdLen]; + int numRead = din.read(password, 0, pwdLen); + if (numRead > -1) { + DelegationTokenInformation tokenInfo = + new DelegationTokenInformation(renewDate, password); + return tokenInfo; + } + } catch (KeeperException.NoNodeException e) { + LOG.error("No node in path [" + nodePath + "]"); + } catch (Exception ex) { + throw new IOException(ex); + } + return null; + } + + @Override + protected void storeDelegationKey(DelegationKey key) throws IOException { + allKeys.put(key.getKeyId(), key); + addOrUpdateDelegationKey(key, false); + } + + @Override + protected void updateDelegationKey(DelegationKey key) throws IOException { + allKeys.put(key.getKeyId(), key); + addOrUpdateDelegationKey(key, true); + } + + private void addOrUpdateDelegationKey(DelegationKey key, boolean isUpdate) + throws IOException { + String nodeCreatePath = + getNodePath(ZK_DTSM_MASTER_KEY_ROOT, + DELEGATION_KEY_PREFIX + key.getKeyId()); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + DataOutputStream fsOut = new DataOutputStream(os); + if (LOG.isDebugEnabled()) { + LOG.debug("Storing ZKDTSMDelegationKey_" + key.getKeyId()); + } + key.write(fsOut); + try { + if (zkClient.checkExists().forPath(nodeCreatePath) != null) { + zkClient.setData().forPath(nodeCreatePath, os.toByteArray()) + .setVersion(-1); + if (!isUpdate) { + LOG.debug("Key with path [" + nodeCreatePath + + "] already exists.. Updating !!"); + } + } else { + zkClient.create().withMode(CreateMode.PERSISTENT) + .forPath(nodeCreatePath, os.toByteArray()); + if (isUpdate) { + LOG.debug("Updating non existent Key path [" + nodeCreatePath + + "].. Adding new !!"); + } + } + } catch (KeeperException.NodeExistsException ne) { + LOG.debug(nodeCreatePath + " znode already exists !!"); + } catch (Exception ex) { + throw new IOException(ex); + } finally { + os.close(); + } + } + + @Override + protected void removeStoredMasterKey(DelegationKey key) { + String nodeRemovePath = + getNodePath(ZK_DTSM_MASTER_KEY_ROOT, + DELEGATION_KEY_PREFIX + key.getKeyId()); + if (LOG.isDebugEnabled()) { + LOG.debug("Removing ZKDTSMDelegationKey_" + key.getKeyId()); + } + try { + if (zkClient.checkExists().forPath(nodeRemovePath) != null) { + zkClient.delete().forPath(nodeRemovePath); + } else { + LOG.debug("Attempted to delete a non-existing znode " + nodeRemovePath); + } + } catch (Exception e) { + LOG.debug(nodeRemovePath + " znode could not be removed!!"); + } + } + + @Override + protected void storeToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + currentTokens.put(ident, tokenInfo); + try { + addOrUpdateToken(ident, tokenInfo, false); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + protected void updateToken(TokenIdent ident, + DelegationTokenInformation tokenInfo) throws IOException { + currentTokens.put(ident, tokenInfo); + String nodeRemovePath = + getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + + ident.getSequenceNumber()); + try { + if (zkClient.checkExists().forPath(nodeRemovePath) != null) { + addOrUpdateToken(ident, tokenInfo, false); + LOG.debug("Attempted to update a non-existing znode " + nodeRemovePath); + } else { + addOrUpdateToken(ident, tokenInfo, true); + } + } catch (Exception e) { + throw new RuntimeException("Could not update Stored Token ZKDTSMDelegationToken_" + + ident.getSequenceNumber(), e); + } + } + + @Override + protected void removeStoredToken(TokenIdent ident) + throws IOException { + String nodeRemovePath = + getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + + ident.getSequenceNumber()); + if (LOG.isDebugEnabled()) { + LOG.debug("Removing ZKDTSMDelegationToken_" + + ident.getSequenceNumber()); + } + try { + if (zkClient.checkExists().forPath(nodeRemovePath) != null) { + LOG.debug("Attempted to remove a non-existing znode " + nodeRemovePath); + } else { + zkClient.delete().forPath(nodeRemovePath); + } + } catch (Exception e) { + throw new RuntimeException( + "Could not remove Stored Token ZKDTSMDelegationToken_" + + ident.getSequenceNumber(), e); + } + } + + private void addOrUpdateToken(TokenIdent ident, + DelegationTokenInformation info, boolean isUpdate) throws Exception { + String nodeCreatePath = + getNodePath(ZK_DTSM_TOKENS_ROOT, DELEGATION_TOKEN_PREFIX + + ident.getSequenceNumber()); + ByteArrayOutputStream tokenOs = new ByteArrayOutputStream(); + DataOutputStream tokenOut = new DataOutputStream(tokenOs); + ByteArrayOutputStream seqOs = new ByteArrayOutputStream(); + + try { + ident.write(tokenOut); + tokenOut.writeLong(info.getRenewDate()); + tokenOut.writeInt(info.getPassword().length); + tokenOut.write(info.getPassword()); + if (LOG.isDebugEnabled()) { + LOG.debug((isUpdate ? "Storing " : "Updating ") + + "ZKDTSMDelegationToken_" + + ident.getSequenceNumber()); + } + if (isUpdate) { + zkClient.setData().forPath(nodeCreatePath, tokenOs.toByteArray()) + .setVersion(-1); + } else { + zkClient.create().withMode(CreateMode.PERSISTENT) + .forPath(nodeCreatePath, tokenOs.toByteArray()); + } + } finally { + seqOs.close(); + } + } + + /** + * Simple implementation of an {@link ACLProvider} that simply returns an ACL + * that gives all permissions only to a single principal. + */ + private static class SASLOwnerACLProvider implements ACLProvider { + + private final List saslACL; + + private SASLOwnerACLProvider(String principal) { + this.saslACL = Collections.singletonList( + new ACL(Perms.ALL, new Id("sasl", principal))); + } + + @Override + public List getDefaultAcl() { + return saslACL; + } + + @Override + public List getAclForPath(String path) { + return saslACL; + } + } + + @VisibleForTesting + @Private + @Unstable + static String getNodePath(String root, String nodeName) { + return (root + "/" + nodeName); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java index 64a562254b79a..aa9ec9948d454 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationFilter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.security.token.delegation.web; import com.google.common.annotations.VisibleForTesting; +import org.apache.curator.framework.CuratorFramework; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -28,9 +29,11 @@ import org.apache.hadoop.security.authentication.server.AuthenticationToken; import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler; +import org.apache.hadoop.security.authentication.util.ZKSignerSecretProvider; import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager; import org.apache.hadoop.util.HttpExceptionUtils; import org.apache.http.NameValuePair; import org.apache.http.client.utils.URLEncodedUtils; @@ -153,7 +156,14 @@ protected Configuration getProxyuserConfiguration(FilterConfig filterConfig) @Override public void init(FilterConfig filterConfig) throws ServletException { + // A single CuratorFramework should be used for a ZK cluster. + // If the ZKSignerSecretProvider has already created it, it has to + // be set here... to be used by the ZKDelegationTokenSecretManager + ZKDelegationTokenSecretManager.setCurator((CuratorFramework) + filterConfig.getServletContext().getAttribute(ZKSignerSecretProvider. + ZOOKEEPER_SIGNER_SECRET_PROVIDER_CURATOR_CLIENT_ATTRIBUTE)); super.init(filterConfig); + ZKDelegationTokenSecretManager.setCurator(null); AuthenticationHandler handler = getAuthenticationHandler(); AbstractDelegationTokenSecretManager dtSecretManager = (AbstractDelegationTokenSecretManager) filterConfig.getServletContext(). diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java index f41f892caa059..5a31d6dc29495 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java @@ -78,19 +78,6 @@ public abstract class DelegationTokenAuthenticationHandler public static final String TOKEN_KIND = PREFIX + "token-kind"; - public static final String UPDATE_INTERVAL = PREFIX + "update-interval.sec"; - public static final long UPDATE_INTERVAL_DEFAULT = 24 * 60 * 60; - - public static final String MAX_LIFETIME = PREFIX + "max-lifetime.sec"; - public static final long MAX_LIFETIME_DEFAULT = 7 * 24 * 60 * 60; - - public static final String RENEW_INTERVAL = PREFIX + "renew-interval.sec"; - public static final long RENEW_INTERVAL_DEFAULT = 24 * 60 * 60; - - public static final String REMOVAL_SCAN_INTERVAL = PREFIX + - "removal-scan-interval.sec"; - public static final long REMOVAL_SCAN_INTERVAL_DEFAULT = 60 * 60; - private static final Set DELEGATION_TOKEN_OPS = new HashSet(); static final String DELEGATION_TOKEN_UGI_ATTRIBUTE = @@ -142,7 +129,6 @@ public void setExternalDelegationTokenSecretManager( @VisibleForTesting @SuppressWarnings("unchecked") public void initTokenManager(Properties config) { - String configPrefix = authHandler.getType() + "."; Configuration conf = new Configuration(false); for (Map.Entry entry : config.entrySet()) { conf.set((String) entry.getKey(), (String) entry.getValue()); @@ -153,17 +139,7 @@ public void initTokenManager(Properties config) { "The configuration does not define the token kind"); } tokenKind = tokenKind.trim(); - long updateInterval = conf.getLong(configPrefix + UPDATE_INTERVAL, - UPDATE_INTERVAL_DEFAULT); - long maxLifeTime = conf.getLong(configPrefix + MAX_LIFETIME, - MAX_LIFETIME_DEFAULT); - long renewInterval = conf.getLong(configPrefix + RENEW_INTERVAL, - RENEW_INTERVAL_DEFAULT); - long removalScanInterval = conf.getLong( - configPrefix + REMOVAL_SCAN_INTERVAL, REMOVAL_SCAN_INTERVAL_DEFAULT); - tokenManager = new DelegationTokenManager(new Text(tokenKind), - updateInterval * 1000, maxLifeTime * 1000, renewInterval * 1000, - removalScanInterval * 1000); + tokenManager = new DelegationTokenManager(conf, new Text(tokenKind)); tokenManager.init(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java index 2e6b46e413660..dbde0a29f087e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenManager.java @@ -17,16 +17,20 @@ */ package org.apache.hadoop.security.token.delegation.web; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; +import org.apache.hadoop.security.token.delegation.ZKDelegationTokenSecretManager; -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; +import com.google.common.annotations.VisibleForTesting; /** * Delegation Token Manager used by the @@ -35,20 +39,36 @@ */ @InterfaceAudience.Private @InterfaceStability.Evolving -class DelegationTokenManager { +public class DelegationTokenManager { + + public static final String ENABLE_ZK_KEY = "zk-dt-secret-manager.enable"; + + public static final String PREFIX = "delegation-token."; + + public static final String UPDATE_INTERVAL = PREFIX + "update-interval.sec"; + public static final long UPDATE_INTERVAL_DEFAULT = 24 * 60 * 60; + + public static final String MAX_LIFETIME = PREFIX + "max-lifetime.sec"; + public static final long MAX_LIFETIME_DEFAULT = 7 * 24 * 60 * 60; + + public static final String RENEW_INTERVAL = PREFIX + "renew-interval.sec"; + public static final long RENEW_INTERVAL_DEFAULT = 24 * 60 * 60; + + public static final String REMOVAL_SCAN_INTERVAL = PREFIX + + "removal-scan-interval.sec"; + public static final long REMOVAL_SCAN_INTERVAL_DEFAULT = 60 * 60; private static class DelegationTokenSecretManager extends AbstractDelegationTokenSecretManager { private Text tokenKind; - public DelegationTokenSecretManager(Text tokenKind, - long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, - long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval) { - super(delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval); + public DelegationTokenSecretManager(Configuration conf, Text tokenKind) { + super(conf.getLong(UPDATE_INTERVAL, UPDATE_INTERVAL_DEFAULT) * 1000, + conf.getLong(MAX_LIFETIME, MAX_LIFETIME_DEFAULT) * 1000, + conf.getLong(RENEW_INTERVAL, RENEW_INTERVAL_DEFAULT) * 1000, + conf.getLong(REMOVAL_SCAN_INTERVAL, + REMOVAL_SCAN_INTERVAL_DEFAULT * 1000)); this.tokenKind = tokenKind; } @@ -56,21 +76,34 @@ public DelegationTokenSecretManager(Text tokenKind, public DelegationTokenIdentifier createIdentifier() { return new DelegationTokenIdentifier(tokenKind); } + } + + private static class ZKSecretManager + extends ZKDelegationTokenSecretManager { + + private Text tokenKind; + + public ZKSecretManager(Configuration conf, Text tokenKind) { + super(conf); + this.tokenKind = tokenKind; + } + @Override + public DelegationTokenIdentifier createIdentifier() { + return new DelegationTokenIdentifier(tokenKind); + } } private AbstractDelegationTokenSecretManager secretManager = null; private boolean managedSecretManager; private Text tokenKind; - public DelegationTokenManager(Text tokenKind, - long delegationKeyUpdateInterval, - long delegationTokenMaxLifetime, - long delegationTokenRenewInterval, - long delegationTokenRemoverScanInterval) { - this.secretManager = new DelegationTokenSecretManager(tokenKind, - delegationKeyUpdateInterval, delegationTokenMaxLifetime, - delegationTokenRenewInterval, delegationTokenRemoverScanInterval); + public DelegationTokenManager(Configuration conf, Text tokenKind) { + if (conf.getBoolean(ENABLE_ZK_KEY, false)) { + this.secretManager = new ZKSecretManager(conf, tokenKind); + } else { + this.secretManager = new DelegationTokenSecretManager(conf, tokenKind); + } this.tokenKind = tokenKind; managedSecretManager = true; } @@ -150,4 +183,9 @@ public UserGroupInformation verifyToken(Token return id.getUser(); } + @VisibleForTesting + @SuppressWarnings("rawtypes") + public AbstractDelegationTokenSecretManager getDelegationTokenSecretManager() { + return secretManager; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java index f2ee446b4ab94..6b27ae5397da7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DiskChecker.java @@ -102,7 +102,7 @@ public static void checkDirs(File dir) throws DiskErrorException { */ public static void checkDir(File dir) throws DiskErrorException { if (!mkdirsWithExistsCheck(dir)) { - throw new DiskErrorException("Can not create directory: " + throw new DiskErrorException("Cannot create directory: " + dir.toString()); } checkDirAccess(dir); diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 70796ccde126e..e59fa1b16389b 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -64,6 +64,33 @@ all operations on a valid FileSystem MUST result in a new FileSystem that is als def isSymlink(FS, p) = p in symlinks(FS) +### 'boolean inEncryptionZone(Path p)' + +Return True if the data for p is encrypted. The nature of the encryption and the +mechanism for creating an encryption zone are implementation details not covered +in this specification. No guarantees are made about the quality of the +encryption. The metadata is not encrypted. + +#### Preconditions + + if not exists(FS, p) : raise FileNotFoundException + +#### Postconditions + +#### Invariants + +All files and directories under a directory in an encryption zone are also in an +encryption zone + + forall d in directories(FS): inEncyptionZone(FS, d) implies + forall c in children(FS, d) where (isFile(FS, c) or isDir(FS, c)) : + inEncyptionZone(FS, c) + +For all files in an encrypted zone, the data is encrypted, but the encryption +type and specification are not defined. + + forall f in files(FS) where inEncyptionZone(FS, c): + isEncrypted(data(f)) ### `FileStatus getFileStatus(Path p)` @@ -88,6 +115,10 @@ Get the status of a path stat.length = 0 stat.isdir = False stat.symlink = FS.Symlinks[p] + if inEncryptionZone(FS, p) : + stat.isEncrypted = True + else + stat.isEncrypted = False ### `Path getHomeDirectory()` diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java index f5acc73b147cd..86bb64d882c06 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java @@ -469,6 +469,7 @@ private void byteBufferReadCheck(InputStream in, ByteBuffer buf, int bufPos) throws Exception { buf.position(bufPos); int n = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(bufPos + n, buf.position()); byte[] readData = new byte[n]; buf.rewind(); buf.position(bufPos); @@ -568,6 +569,7 @@ public void testCombinedOp() throws Exception { // Read forward len1 ByteBuffer buf = ByteBuffer.allocate(len1); int nRead = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(nRead, buf.position()); readData = new byte[nRead]; buf.rewind(); buf.get(readData); @@ -575,9 +577,10 @@ public void testCombinedOp() throws Exception { System.arraycopy(data, (int)pos, expectedData, 0, nRead); Assert.assertArrayEquals(readData, expectedData); - // Pos should be len1 + 2 * len2 + nRead + long lastPos = pos; + // Pos should be lastPos + nRead pos = ((Seekable) in).getPos(); - Assert.assertEquals(len1 + 2 * len2 + nRead, pos); + Assert.assertEquals(lastPos + nRead, pos); // Pos: 1/3 dataLen positionedReadCheck(in , dataLen / 3); @@ -589,13 +592,15 @@ public void testCombinedOp() throws Exception { System.arraycopy(data, (int)pos, expectedData, 0, len1); Assert.assertArrayEquals(readData, expectedData); - // Pos should be 2 * len1 + 2 * len2 + nRead + lastPos = pos; + // Pos should be lastPos + len1 pos = ((Seekable) in).getPos(); - Assert.assertEquals(2 * len1 + 2 * len2 + nRead, pos); + Assert.assertEquals(lastPos + len1, pos); // Read forward len1 buf = ByteBuffer.allocate(len1); nRead = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(nRead, buf.position()); readData = new byte[nRead]; buf.rewind(); buf.get(readData); @@ -603,6 +608,11 @@ public void testCombinedOp() throws Exception { System.arraycopy(data, (int)pos, expectedData, 0, nRead); Assert.assertArrayEquals(readData, expectedData); + lastPos = pos; + // Pos should be lastPos + nRead + pos = ((Seekable) in).getPos(); + Assert.assertEquals(lastPos + nRead, pos); + // ByteBuffer read after EOF ((Seekable) in).seek(dataLen); buf.clear(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java index 65ebfb194666a..cbbb27e91eb96 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java @@ -21,6 +21,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Test; @@ -30,6 +31,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.rm; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; /** @@ -65,6 +67,16 @@ public void testOpenReadZeroByteFile() throws Throwable { assertMinusOne("initial byte read", result); } + @Test + public void testFsIsEncrypted() throws Exception { + describe("create an empty file and call FileStatus.isEncrypted()"); + final Path path = path("file"); + createFile(getFileSystem(), path, false, new byte[0]); + final FileStatus stat = getFileSystem().getFileStatus(path); + assertFalse("Expecting false for stat.isEncrypted()", + stat.isEncrypted()); + } + @Test public void testOpenReadDir() throws Throwable { describe("create & read a directory"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index f0e389ff5de94..c1b1bfb902b15 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -276,12 +276,22 @@ private static interface StoppedProtocol { */ private static class StoppedRpcEngine implements RpcEngine { - @SuppressWarnings("unchecked") @Override public ProtocolProxy getProxy(Class protocol, long clientVersion, InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, SocketFactory factory, int rpcTimeout, RetryPolicy connectionRetryPolicy ) throws IOException { + return getProxy(protocol, clientVersion, addr, ticket, conf, factory, + rpcTimeout, connectionRetryPolicy, null); + } + + @SuppressWarnings("unchecked") + @Override + public ProtocolProxy getProxy(Class protocol, long clientVersion, + InetSocketAddress addr, UserGroupInformation ticket, Configuration conf, + SocketFactory factory, int rpcTimeout, + RetryPolicy connectionRetryPolicy, AtomicBoolean fallbackToSimpleAuth + ) throws IOException { T proxy = (T) Proxy.newProxyInstance(protocol.getClassLoader(), new Class[] { protocol }, new StoppedInvocationHandler()); return new ProtocolProxy(protocol, proxy, false); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java new file mode 100644 index 0000000000000..3ab663f3dfa22 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestClusterTopology.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.net; + +import java.util.ArrayList; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class TestClusterTopology extends Assert { + + public static class NodeElement implements Node { + private String location; + private String name; + private Node parent; + private int level; + + public NodeElement(String name) { + this.name = name; + } + + @Override + public String getNetworkLocation() { + return location; + } + + @Override + public void setNetworkLocation(String location) { + this.location = location; + } + + @Override + public String getName() { + return name; + } + + @Override + public Node getParent() { + return parent; + } + + @Override + public void setParent(Node parent) { + this.parent = parent; + } + + @Override + public int getLevel() { + return level; + } + + @Override + public void setLevel(int i) { + this.level = i; + } + + } + + /** + * Test the count of nodes with exclude list + */ + @Test + public void testCountNumNodes() throws Exception { + // create the topology + NetworkTopology cluster = new NetworkTopology(); + cluster.add(getNewNode("node1", "/d1/r1")); + NodeElement node2 = getNewNode("node2", "/d1/r2"); + cluster.add(node2); + cluster.add(getNewNode("node3", "/d1/r3")); + NodeElement node3 = getNewNode("node4", "/d1/r4"); + cluster.add(node3); + // create exclude list + List excludedNodes = new ArrayList(); + + assertEquals("4 nodes should be available", 4, + cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes)); + NodeElement deadNode = getNewNode("node5", "/d1/r2"); + excludedNodes.add(deadNode); + assertEquals("4 nodes should be available with extra excluded Node", 4, + cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes)); + // add one existing node to exclude list + excludedNodes.add(node3); + assertEquals("excluded nodes with ROOT scope should be considered", 3, + cluster.countNumOfAvailableNodes(NodeBase.ROOT, excludedNodes)); + assertEquals("excluded nodes without ~ scope should be considered", 2, + cluster.countNumOfAvailableNodes("~" + deadNode.getNetworkLocation(), + excludedNodes)); + assertEquals("excluded nodes with rack scope should be considered", 1, + cluster.countNumOfAvailableNodes(deadNode.getNetworkLocation(), + excludedNodes)); + // adding the node in excluded scope to excluded list + excludedNodes.add(node2); + assertEquals("excluded nodes with ~ scope should be considered", 2, + cluster.countNumOfAvailableNodes("~" + deadNode.getNetworkLocation(), + excludedNodes)); + // getting count with non-exist scope. + assertEquals("No nodes should be considered for non-exist scope", 0, + cluster.countNumOfAvailableNodes("/non-exist", excludedNodes)); + } + + private NodeElement getNewNode(String name, String rackLocation) { + NodeElement node = new NodeElement(name); + node.setNetworkLocation(rackLocation); + return node; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java index 657fae3f52646..15bd9fe492404 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java @@ -104,8 +104,7 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[2]; testNodes[2] = dataNodes[3]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[2]); @@ -116,8 +115,7 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[4]; testNodes[2] = dataNodes[1]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); @@ -126,8 +124,7 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[2]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[2]); @@ -136,8 +133,7 @@ public void testSortByDistance() throws Exception { testNodes[1] = dataNodes[7]; testNodes[2] = dataNodes[2]; testNodes[3] = dataNodes[0]; - cluster.sortByDistance(computeNode, testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.sortByDistance(computeNode, testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[2]); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java index 4f83a57f3f69a..239b8414eb39c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestDelegationToken.java @@ -121,7 +121,7 @@ protected void removeStoredMasterKey(DelegationKey key) { @Override protected void storeNewToken(TestDelegationTokenIdentifier ident, - long renewDate) { + long renewDate) throws IOException { super.storeNewToken(ident, renewDate); isStoreNewTokenCalled = true; } @@ -135,7 +135,7 @@ protected void removeStoredToken(TestDelegationTokenIdentifier ident) @Override protected void updateStoredToken(TestDelegationTokenIdentifier ident, - long renewDate) { + long renewDate) throws IOException { super.updateStoredToken(ident, renewDate); isUpdateStoredTokenCalled = true; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java new file mode 100644 index 0000000000000..076c87ae689d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/TestZKDelegationTokenSecretManager.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.security.token.delegation; + +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenManager; +import org.junit.Assert; +import org.junit.Test; + +public class TestZKDelegationTokenSecretManager { + + private static final long DAY_IN_SECS = 86400; + + @Test + public void testZKDelTokSecretManager() throws Exception { + TestingServer zkServer = new TestingServer(); + DelegationTokenManager tm1, tm2 = null; + zkServer.start(); + try { + String connectString = zkServer.getConnectString(); + Configuration conf = new Configuration(); + conf.setBoolean(DelegationTokenManager.ENABLE_ZK_KEY, true); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_CONNECTION_STRING, connectString); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZNODE_WORKING_PATH, "testPath"); + conf.set(ZKDelegationTokenSecretManager.ZK_DTSM_ZK_AUTH_TYPE, "none"); + conf.setLong(DelegationTokenManager.UPDATE_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.MAX_LIFETIME, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.RENEW_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, DAY_IN_SECS); + tm1 = new DelegationTokenManager(conf, new Text("foo")); + tm1.init(); + tm2 = new DelegationTokenManager(conf, new Text("foo")); + tm2.init(); + + Token token = + tm1.createToken(UserGroupInformation.getCurrentUser(), "foo"); + Assert.assertNotNull(token); + tm2.verifyToken(token); + + token = tm2.createToken(UserGroupInformation.getCurrentUser(), "bar"); + Assert.assertNotNull(token); + tm1.verifyToken(token); + } finally { + zkServer.close(); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestDelegationTokenManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestDelegationTokenManager.java index 4a0e8342f21b8..496b762bc090d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestDelegationTokenManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/token/delegation/web/TestDelegationTokenManager.java @@ -17,27 +17,28 @@ */ package org.apache.hadoop.security.token.delegation.web; +import java.io.IOException; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.util.StringUtils; import org.junit.Assert; import org.junit.Test; -import java.io.IOException; -import java.net.InetAddress; -import java.net.InetSocketAddress; -import java.util.Arrays; - public class TestDelegationTokenManager { private static final long DAY_IN_SECS = 86400; @Test public void testDTManager() throws Exception { - DelegationTokenManager tm = new DelegationTokenManager(new Text("foo"), - DAY_IN_SECS, DAY_IN_SECS, DAY_IN_SECS, DAY_IN_SECS); + Configuration conf = new Configuration(false); + conf.setLong(DelegationTokenManager.UPDATE_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.MAX_LIFETIME, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.RENEW_INTERVAL, DAY_IN_SECS); + conf.setLong(DelegationTokenManager.REMOVAL_SCAN_INTERVAL, DAY_IN_SECS); + DelegationTokenManager tm = + new DelegationTokenManager(conf, new Text("foo")); tm.init(); Token token = tm.createToken(UserGroupInformation.getCurrentUser(), "foo"); diff --git a/hadoop-common-project/hadoop-kms/pom.xml b/hadoop-common-project/hadoop-kms/pom.xml index 2c225cb18eb67..e6b21aad6ce45 100644 --- a/hadoop-common-project/hadoop-kms/pom.xml +++ b/hadoop-common-project/hadoop-kms/pom.xml @@ -187,6 +187,11 @@ metrics-core compile + + org.apache.curator + curator-test + test + diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml b/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml index 24a46b86ec49a..1d5b649c83dd4 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-acls.xml @@ -23,7 +23,7 @@ * ACL for create-key operations. - If the user does is not in the GET ACL, the key material is not returned + If the user is not in the GET ACL, the key material is not returned as part of the response. @@ -58,7 +58,7 @@ hadoop.kms.acl.GET_KEYS * - ACL for get-keys operation. + ACL for get-keys operations. @@ -66,7 +66,7 @@ hadoop.kms.acl.GET_METADATA * - ACL for get-key-metadata an get-keys-metadata operations. + ACL for get-key-metadata and get-keys-metadata operations. @@ -74,7 +74,7 @@ hadoop.kms.acl.SET_KEY_MATERIAL * - Complimentary ACL for CREATE and ROLLOVER operation to allow the client + Complementary ACL for CREATE and ROLLOVER operations to allow the client to provide the key material when creating or rolling a key. @@ -83,7 +83,7 @@ hadoop.kms.acl.GENERATE_EEK * - ACL for generateEncryptedKey CryptoExtension operations + ACL for generateEncryptedKey CryptoExtension operations. @@ -91,7 +91,7 @@ hadoop.kms.acl.DECRYPT_EEK * - ACL for decrypt EncryptedKey CryptoExtension operations + ACL for decryptEncryptedKey CryptoExtension operations. diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml index 20896fc2873c1..a810ca44d2958 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml @@ -15,10 +15,12 @@ + - hadoop.security.key.provider.path + hadoop.kms.key.provider.uri jceks://file@/${user.home}/kms.keystore + URI of the backing KeyProvider for the KMS. @@ -26,14 +28,52 @@ hadoop.security.keystore.JavaKeyStoreProvider.password none + If using the JavaKeyStoreProvider, the password for the keystore file. + + + hadoop.kms.cache.enable + true + + Whether the KMS will act as a cache for the backing KeyProvider. + When the cache is enabled, operations like getKeyVersion, getMetadata, + and getCurrentKey will sometimes return cached data without consulting + the backing KeyProvider. Cached values are flushed when keys are deleted + or modified. + + + hadoop.kms.cache.timeout.ms + 600000 + + Expiry time for the KMS key version and key metadata cache, in + milliseconds. This affects getKeyVersion and getMetadata. + + + + + hadoop.kms.current.key.cache.timeout.ms + 30000 + + Expiry time for the KMS current key cache, in milliseconds. This + affects getCurrentKey operations. + + + + + + + hadoop.kms.audit.aggregation.window.ms 10000 + Duplicate audit log events within the aggregation window (specified in + ms) are quashed to reduce log traffic. A single message for aggregated + events is printed at the end of the window, along with a count of the + number of aggregated events. @@ -43,7 +83,8 @@ hadoop.kms.authentication.type simple - simple or kerberos + Authentication type for the KMS. Can be either "simple" + or "kerberos". @@ -51,6 +92,7 @@ hadoop.kms.authentication.kerberos.keytab ${user.home}/kms.keytab + Path to the keytab with credentials for the configured Kerberos principal. @@ -58,6 +100,8 @@ hadoop.kms.authentication.kerberos.principal HTTP/localhost + The Kerberos principal to use for the HTTP endpoint. + The principal must start with 'HTTP/' as per the Kerberos HTTP SPNEGO specification. @@ -65,6 +109,64 @@ hadoop.kms.authentication.kerberos.name.rules DEFAULT + Rules used to resolve Kerberos principal names. + + + + + + + hadoop.kms.authentication.signer.secret.provider + random + + Indicates how the secret to sign the authentication cookies will be + stored. Options are 'random' (default), 'string' and 'zookeeper'. + If using a setup with multiple KMS instances, 'zookeeper' should be used. + + + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.path + /hadoop-kms/hadoop-auth-signature-secret + + The Zookeeper ZNode path where the KMS instances will store and retrieve + the secret from. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string + #HOSTNAME#:#PORT#,... + + The Zookeeper connection string, a list of hostnames and port comma + separated. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type + kerberos + + The Zookeeper authentication type, 'none' or 'sasl' (Kerberos). + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to Zookeeper. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to Zookeeper. diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java index dc55a8459cf40..7ff76e54ffb8c 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAudit.java @@ -103,9 +103,17 @@ public static enum OpStatus { private static Logger AUDIT_LOG = LoggerFactory.getLogger(KMS_LOGGER_NAME); - KMSAudit(long delay) { + /** + * Create a new KMSAudit. + * + * @param windowMs Duplicate events within the aggregation window are quashed + * to reduce log traffic. A single message for aggregated + * events is printed at the end of the window, along with a + * count of the number of aggregated events. + */ + KMSAudit(long windowMs) { cache = CacheBuilder.newBuilder() - .expireAfterWrite(delay, TimeUnit.MILLISECONDS) + .expireAfterWrite(windowMs, TimeUnit.MILLISECONDS) .removalListener( new RemovalListener() { @Override @@ -126,7 +134,7 @@ public void onRemoval( public void run() { cache.cleanUp(); } - }, delay / 10, delay / 10, TimeUnit.MILLISECONDS); + }, windowMs / 10, windowMs / 10, TimeUnit.MILLISECONDS); } private void logEvent(AuditEvent event) { diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java index 4df6db5408413..79652f35ad2d3 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java @@ -46,7 +46,8 @@ @InterfaceAudience.Private public class KMSAuthenticationFilter extends DelegationTokenAuthenticationFilter { - private static final String CONF_PREFIX = KMSConfiguration.CONFIG_PREFIX + + + public static final String CONFIG_PREFIX = KMSConfiguration.CONFIG_PREFIX + "authentication."; @Override @@ -56,9 +57,9 @@ protected Properties getConfiguration(String configPrefix, Configuration conf = KMSWebApp.getConfiguration(); for (Map.Entry entry : conf) { String name = entry.getKey(); - if (name.startsWith(CONF_PREFIX)) { + if (name.startsWith(CONFIG_PREFIX)) { String value = conf.get(name); - name = name.substring(CONF_PREFIX.length()); + name = name.substring(CONFIG_PREFIX.length()); props.setProperty(name, value); } } diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java index f02811993f729..bd61ca7edf5cc 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java @@ -40,6 +40,10 @@ public class KMSConfiguration { public static final String KEY_ACL_PREFIX = "key.acl."; public static final String DEFAULT_KEY_ACL_PREFIX = "default.key.acl."; + // Property to set the backing KeyProvider + public static final String KEY_PROVIDER_URI = CONFIG_PREFIX + + "key.provider.uri"; + // Property to Enable/Disable Caching public static final String KEY_CACHE_ENABLE = CONFIG_PREFIX + "cache.enable"; @@ -50,8 +54,8 @@ public class KMSConfiguration { public static final String CURR_KEY_CACHE_TIMEOUT_KEY = CONFIG_PREFIX + "current.key.cache.timeout.ms"; // Delay for Audit logs that need aggregation - public static final String KMS_AUDIT_AGGREGATION_DELAY = CONFIG_PREFIX + - "aggregation.delay.ms"; + public static final String KMS_AUDIT_AGGREGATION_WINDOW = CONFIG_PREFIX + + "audit.aggregation.window.ms"; public static final boolean KEY_CACHE_ENABLE_DEFAULT = true; // 10 mins @@ -59,7 +63,7 @@ public class KMSConfiguration { // 30 secs public static final long CURR_KEY_CACHE_TIMEOUT_DEFAULT = 30 * 1000; // 10 secs - public static final long KMS_AUDIT_AGGREGATION_DELAY_DEFAULT = 10000; + public static final long KMS_AUDIT_AGGREGATION_WINDOW_DEFAULT = 10000; // Property to Enable/Disable per Key authorization public static final String KEY_AUTHORIZATION_ENABLE = CONFIG_PREFIX + diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSExceptionsProvider.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSExceptionsProvider.java index 77b78ee783c1c..5cb088567c920 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSExceptionsProvider.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSExceptionsProvider.java @@ -79,7 +79,7 @@ public Response toResponse(Exception exception) { // we don't audit here because we did it already when checking access doAudit = false; } else if (throwable instanceof AuthorizationException) { - status = Response.Status.UNAUTHORIZED; + status = Response.Status.FORBIDDEN; // we don't audit here because we did it already when checking access doAudit = false; } else if (throwable instanceof AccessControlException) { diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java index 0827b78286e4b..325f8db27a000 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java @@ -39,6 +39,7 @@ import javax.servlet.ServletContextListener; import java.io.File; +import java.net.URI; import java.net.URL; import java.util.List; @@ -147,8 +148,8 @@ public void contextInitialized(ServletContextEvent sce) { kmsAudit = new KMSAudit(kmsConf.getLong( - KMSConfiguration.KMS_AUDIT_AGGREGATION_DELAY, - KMSConfiguration.KMS_AUDIT_AGGREGATION_DELAY_DEFAULT)); + KMSConfiguration.KMS_AUDIT_AGGREGATION_WINDOW, + KMSConfiguration.KMS_AUDIT_AGGREGATION_WINDOW_DEFAULT)); // this is required for the the JMXJsonServlet to work properly. // the JMXJsonServlet is behind the authentication filter, @@ -159,17 +160,12 @@ public void contextInitialized(ServletContextEvent sce) { new AccessControlList(AccessControlList.WILDCARD_ACL_VALUE)); // intializing the KeyProvider - - List providers = KeyProviderFactory.getProviders(kmsConf); - if (providers.isEmpty()) { + String providerString = kmsConf.get(KMSConfiguration.KEY_PROVIDER_URI); + if (providerString == null) { throw new IllegalStateException("No KeyProvider has been defined"); } - if (providers.size() > 1) { - LOG.warn("There is more than one KeyProvider configured '{}', using " + - "the first provider", - kmsConf.get(KeyProviderFactory.KEY_PROVIDER_PATH)); - } - KeyProvider keyProvider = providers.get(0); + KeyProvider keyProvider = + KeyProviderFactory.get(new URI(providerString), kmsConf); if (kmsConf.getBoolean(KMSConfiguration.KEY_CACHE_ENABLE, KMSConfiguration.KEY_CACHE_ENABLE_DEFAULT)) { long keyTimeOutMillis = diff --git a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm index d70f2a6d62ed3..e32893b377f1c 100644 --- a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm @@ -51,7 +51,7 @@ Hadoop Key Management Server (KMS) - Documentation Sets ${project.version} +---+ - hadoop.security.key.provider.path + hadoop.kms.key.provider.uri jceks://file@/${user.home}/kms.keystore @@ -448,16 +448,16 @@ $ keytool -genkey -alias tomcat -keyalg RSA KMS supports access control for all non-read operations at the Key level. All Key Access operations are classified as : - * MANAGEMENT - createKey, deleteKey, rolloverNewVersion + * MANAGEMENT - createKey, deleteKey, rolloverNewVersion - * GENERATE_EEK - generateEncryptedKey, warmUpEncryptedKeys + * GENERATE_EEK - generateEncryptedKey, warmUpEncryptedKeys - * DECRYPT_EEK - decryptEncryptedKey; + * DECRYPT_EEK - decryptEncryptedKey - * READ - getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, - getCurrentKey; + * READ - getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, + getCurrentKey - * ALL - all of the above; + * ALL - all of the above These can be defined in the KMS <<>> as follows @@ -554,40 +554,147 @@ $ keytool -genkey -alias tomcat -keyalg RSA KMS delegation token secret manager can be configured with the following properties: - +---+ - - hadoop.kms.authentication.delegation-token.update-interval.sec - 86400 - - How often the master key is rotated, in seconds. Default value 1 day. - - - - - hadoop.kms.authentication.delegation-token.max-lifetime.sec - 604800 - - Maximum lifetime of a delagation token, in seconds. Default value 7 days. - - - - - hadoop.kms.authentication.delegation-token.renew-interval.sec - 86400 - - Renewal interval of a delagation token, in seconds. Default value 1 day. - - - - - hadoop.kms.authentication.delegation-token.removal-scan-interval.sec - 3600 - - Scan interval to remove expired delegation tokens. - - - +---+ ++---+ + + hadoop.kms.authentication.delegation-token.update-interval.sec + 86400 + + How often the master key is rotated, in seconds. Default value 1 day. + + + + + hadoop.kms.authentication.delegation-token.max-lifetime.sec + 604800 + + Maximum lifetime of a delagation token, in seconds. Default value 7 days. + + + + + hadoop.kms.authentication.delegation-token.renew-interval.sec + 86400 + + Renewal interval of a delagation token, in seconds. Default value 1 day. + + + + + hadoop.kms.authentication.delegation-token.removal-scan-interval.sec + 3600 + + Scan interval to remove expired delegation tokens. + + ++---+ + + +** Using Multiple Instances of KMS Behind a Load-Balancer or VIP + + KMS supports multiple KMS instances behind a load-balancer or VIP for + scalability and for HA purposes. + + When using multiple KMS instances behind a load-balancer or VIP, requests from + the same user may be handled by different KMS instances. + + KMS instances behind a load-balancer or VIP must be specially configured to + work properly as a single logical service. + +*** HTTP Kerberos Principals Configuration + + When KMS instances are behind a load-balancer or VIP, clients will use the + hostname of the VIP. For Kerberos SPNEGO authentication, the hostname of the + URL is used to construct the Kerberos service name of the server, + <<>>. This means that all KMS instances must have a Kerberos + service name with the load-balancer or VIP hostname. + + In order to be able to access directly a specific KMS instance, the KMS + instance must also have Keberos service name with its own hostname. This is + required for monitoring and admin purposes. + + Both Kerberos service principal credentials (for the load-balancer/VIP + hostname and for the actual KMS instance hostname) must be in the keytab file + configured for authentication. And the principal name specified in the + configuration must be '*'. For example: + ++---+ + + hadoop.kms.authentication.kerberos.principal + * + ++---+ + + <> If using HTTPS, the SSL certificate used by the KMS instance must + be configured to support multiple hostnames (see Java 7 + <<>> SAN extension support for details on how to do this). + +*** HTTP Authentication Signature + + KMS uses Hadoop Authentication for HTTP authentication. Hadoop Authentication + issues a signed HTTP Cookie once the client has authenticated successfully. + This HTTP Cookie has an expiration time, after which it will trigger a new + authentication sequence. This is done to avoid triggering the authentication + on every HTTP request of a client. + + A KMS instance must verify the HTTP Cookie signatures signed by other KMS + instances. To do this all KMS instances must share the signing secret. + + This secret sharing can be done using a Zookeeper service which is configured + in KMS with the following properties in the <<>>: + ++---+ + + hadoop.kms.authentication.signer.secret.provider + zookeeper + + Indicates how the secret to sign the authentication cookies will be + stored. Options are 'random' (default), 'string' and 'zookeeper'. + If using a setup with multiple KMS instances, 'zookeeper' should be used. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.path + /hadoop-kms/hadoop-auth-signature-secret + + The Zookeeper ZNode path where the KMS instances will store and retrieve + the secret from. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string + #HOSTNAME#:#PORT#,... + + The Zookeeper connection string, a list of hostnames and port comma + separated. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type + kerberos + + The Zookeeper authentication type, 'none' or 'sasl' (Kerberos). + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to Zookeeper. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to Zookeeper. + + ++---+ + +*** Delegation Tokens + TBD ** KMS HTTP REST API diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java index f64dcf0e1aa8e..697d7ec6d62f3 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java @@ -166,10 +166,9 @@ public void start() throws Exception { File kmsFile = new File(kmsConfDir, "kms-site.xml"); if (!kmsFile.exists()) { Configuration kms = new Configuration(false); - kms.set("hadoop.security.key.provider.path", + kms.set(KMSConfiguration.KEY_PROVIDER_URI, "jceks://file@" + new Path(kmsConfDir, "kms.keystore").toUri()); kms.set("hadoop.kms.authentication.type", "simple"); - kms.setBoolean(KMSConfiguration.KEY_AUTHORIZATION_ENABLE, false); Writer writer = new FileWriter(kmsFile); kms.writeXml(writer); writer.close(); diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index f4f9fead63e58..921141766a3c7 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -117,13 +117,14 @@ protected void runServer(String keystore, String password, File confDir, protected Configuration createBaseKMSConf(File keyStoreDir) throws Exception { Configuration conf = new Configuration(false); - conf.set("hadoop.security.key.provider.path", + conf.set(KMSConfiguration.KEY_PROVIDER_URI, "jceks://file@" + new Path(keyStoreDir.getAbsolutePath(), "kms.keystore").toUri()); conf.set("hadoop.kms.authentication.type", "simple"); return conf; } - protected void writeConf(File confDir, Configuration conf) throws Exception { + public static void writeConf(File confDir, Configuration conf) + throws Exception { Writer writer = new FileWriter(new File(confDir, KMSConfiguration.KMS_SITE_XML)); conf.writeXml(writer); @@ -139,7 +140,7 @@ protected void writeConf(File confDir, Configuration conf) throws Exception { writer.close(); } - protected URI createKMSUri(URL kmsUrl) throws Exception { + public static URI createKMSUri(URL kmsUrl) throws Exception { String str = kmsUrl.toString(); str = str.replaceFirst("://", "@"); return new URI("kms://" + str); diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java new file mode 100644 index 0000000000000..3a02a0a281047 --- /dev/null +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key.kms.server; + +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion; +import org.apache.hadoop.crypto.key.KeyProvider.Options; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; +import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension; +import org.apache.hadoop.crypto.key.kms.KMSClientProvider; +import org.apache.hadoop.crypto.key.kms.KMSRESTConstants; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.server.AuthenticationFilter; +import org.apache.hadoop.security.authentication.util.ZKSignerSecretProvider; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.LoginContext; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.net.HttpURLConnection; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.SocketTimeoutException; +import java.net.URI; +import java.net.URL; +import java.security.Principal; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.Callable; + +public class TestKMSWithZK { + + protected Configuration createBaseKMSConf(File keyStoreDir) throws Exception { + Configuration conf = new Configuration(false); + conf.set(KMSConfiguration.KEY_PROVIDER_URI, + "jceks://file@" + new Path(keyStoreDir.getAbsolutePath(), + "kms.keystore").toUri()); + conf.set("hadoop.kms.authentication.type", "simple"); + conf.setBoolean(KMSConfiguration.KEY_AUTHORIZATION_ENABLE, false); + + conf.set(KMSACLs.Type.GET_KEYS.getAclConfigKey(), "foo"); + return conf; + } + + @Test + public void testMultipleKMSInstancesWithZKSigner() throws Exception { + final File testDir = TestKMS.getTestDir(); + Configuration conf = createBaseKMSConf(testDir); + + TestingServer zkServer = new TestingServer(); + zkServer.start(); + + MiniKMS kms1 = null; + MiniKMS kms2 = null; + + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + AuthenticationFilter.SIGNER_SECRET_PROVIDER, "zookeeper"); + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + ZKSignerSecretProvider.ZOOKEEPER_CONNECTION_STRING, + zkServer.getConnectString()); + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + ZKSignerSecretProvider.ZOOKEEPER_PATH, "/secret"); + TestKMS.writeConf(testDir, conf); + + try { + kms1 = new MiniKMS.Builder() + .setKmsConfDir(testDir).setLog4jConfFile("log4j.properties").build(); + kms1.start(); + + kms2 = new MiniKMS.Builder() + .setKmsConfDir(testDir).setLog4jConfFile("log4j.properties").build(); + kms2.start(); + + final URL url1 = new URL(kms1.getKMSUrl().toExternalForm() + + KMSRESTConstants.SERVICE_VERSION + "/" + + KMSRESTConstants.KEYS_NAMES_RESOURCE); + final URL url2 = new URL(kms2.getKMSUrl().toExternalForm() + + KMSRESTConstants.SERVICE_VERSION + "/" + + KMSRESTConstants.KEYS_NAMES_RESOURCE); + + final DelegationTokenAuthenticatedURL.Token token = + new DelegationTokenAuthenticatedURL.Token(); + final DelegationTokenAuthenticatedURL aUrl = + new DelegationTokenAuthenticatedURL(); + + UserGroupInformation ugiFoo = UserGroupInformation.createUserForTesting( + "foo", new String[]{"gfoo"}); + UserGroupInformation ugiBar = UserGroupInformation.createUserForTesting( + "bar", new String[]{"gBar"}); + + ugiFoo.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + HttpURLConnection conn = aUrl.openConnection(url1, token); + Assert.assertEquals(HttpURLConnection.HTTP_OK, + conn.getResponseCode()); + return null; + } + }); + + ugiBar.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + HttpURLConnection conn = aUrl.openConnection(url2, token); + Assert.assertEquals(HttpURLConnection.HTTP_OK, + conn.getResponseCode()); + return null; + } + }); + + ugiBar.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + final DelegationTokenAuthenticatedURL.Token emptyToken = + new DelegationTokenAuthenticatedURL.Token(); + HttpURLConnection conn = aUrl.openConnection(url2, emptyToken); + Assert.assertEquals(HttpURLConnection.HTTP_FORBIDDEN, + conn.getResponseCode()); + return null; + } + }); + + } finally { + if (kms2 != null) { + kms2.stop(); + } + if (kms1 != null) { + kms1.stop(); + } + zkServer.stop(); + } + + } + +} diff --git a/hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java b/hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java index d3ea2e70cfa2a..7107b75aaef78 100644 --- a/hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java +++ b/hadoop-common-project/hadoop-minikdc/src/main/java/org/apache/hadoop/minikdc/MiniKdc.java @@ -70,6 +70,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.Set; @@ -109,6 +110,11 @@ */ public class MiniKdc { + public static final String JAVA_SECURITY_KRB5_CONF = + "java.security.krb5.conf"; + public static final String SUN_SECURITY_KRB5_DEBUG = + "sun.security.krb5.debug"; + public static void main(String[] args) throws Exception { if (args.length < 4) { System.out.println("Arguments: " + @@ -266,7 +272,8 @@ public MiniKdc(Properties conf, File workDir) throws Exception { } String orgName= conf.getProperty(ORG_NAME); String orgDomain = conf.getProperty(ORG_DOMAIN); - realm = orgName.toUpperCase() + "." + orgDomain.toUpperCase(); + realm = orgName.toUpperCase(Locale.ENGLISH) + "." + + orgDomain.toUpperCase(Locale.ENGLISH); } /** @@ -355,8 +362,8 @@ private void initDirectoryService() throws Exception { ds.addLast(new KeyDerivationInterceptor()); // create one partition - String orgName= conf.getProperty(ORG_NAME).toLowerCase(); - String orgDomain = conf.getProperty(ORG_DOMAIN).toLowerCase(); + String orgName= conf.getProperty(ORG_NAME).toLowerCase(Locale.ENGLISH); + String orgDomain = conf.getProperty(ORG_DOMAIN).toLowerCase(Locale.ENGLISH); JdbmPartition partition = new JdbmPartition(ds.getSchemaManager()); partition.setId(orgName); @@ -387,10 +394,10 @@ private void initKDCServer() throws Exception { String orgDomain = conf.getProperty(ORG_DOMAIN); String bindAddress = conf.getProperty(KDC_BIND_ADDRESS); final Map map = new HashMap(); - map.put("0", orgName.toLowerCase()); - map.put("1", orgDomain.toLowerCase()); - map.put("2", orgName.toUpperCase()); - map.put("3", orgDomain.toUpperCase()); + map.put("0", orgName.toLowerCase(Locale.ENGLISH)); + map.put("1", orgDomain.toLowerCase(Locale.ENGLISH)); + map.put("2", orgName.toUpperCase(Locale.ENGLISH)); + map.put("3", orgDomain.toUpperCase(Locale.ENGLISH)); map.put("4", bindAddress); ClassLoader cl = Thread.currentThread().getContextClassLoader(); @@ -455,9 +462,9 @@ private void initKDCServer() throws Exception { FileUtils.writeStringToFile(krb5conf, MessageFormat.format(sb.toString(), getRealm(), getHost(), Integer.toString(getPort()), System.getProperty("line.separator"))); - System.setProperty("java.security.krb5.conf", krb5conf.getAbsolutePath()); + System.setProperty(JAVA_SECURITY_KRB5_CONF, krb5conf.getAbsolutePath()); - System.setProperty("sun.security.krb5.debug", conf.getProperty(DEBUG, + System.setProperty(SUN_SECURITY_KRB5_DEBUG, conf.getProperty(DEBUG, "false")); // refresh the config @@ -481,8 +488,8 @@ private void initKDCServer() throws Exception { */ public synchronized void stop() { if (kdc != null) { - System.getProperties().remove("java.security.krb5.conf"); - System.getProperties().remove("sun.security.krb5.debug"); + System.getProperties().remove(JAVA_SECURITY_KRB5_CONF); + System.getProperties().remove(SUN_SECURITY_KRB5_DEBUG); kdc.stop(); try { ds.shutdown(); @@ -520,8 +527,8 @@ public synchronized void createPrincipal(String principal, String password) throws Exception { String orgName= conf.getProperty(ORG_NAME); String orgDomain = conf.getProperty(ORG_DOMAIN); - String baseDn = "ou=users,dc=" + orgName.toLowerCase() + ",dc=" + - orgDomain.toLowerCase(); + String baseDn = "ou=users,dc=" + orgName.toLowerCase(Locale.ENGLISH) + + ",dc=" + orgDomain.toLowerCase(Locale.ENGLISH); String content = "dn: uid=" + principal + "," + baseDn + "\n" + "objectClass: top\n" + "objectClass: person\n" + diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java index f2d26115d104d..aad20e0bec37f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java @@ -241,7 +241,7 @@ private CacheLoader inputStreamLoader( public FSDataInputStream load(DFSInputStreamCaheKey key) throws Exception { DFSClient client = getDfsClient(key.userId); DFSInputStream dis = client.open(key.inodePath); - return new FSDataInputStream(dis); + return client.createWrappedInputStream(dis); } }; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index ede65c62837bc..71908d8653032 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -678,7 +678,7 @@ private WRITE3Response processPerfectOverWrite(DFSClient dfsClient, } try { - fis = new FSDataInputStream(dfsClient.open(path)); + fis = dfsClient.createWrappedInputStream(dfsClient.open(path)); readCount = fis.read(offset, readbuffer, 0, count); if (readCount < count) { LOG.error("Can't read back " + count + " bytes, partial read size:" diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 0d591d6396330..70c37d86cbfb9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -922,8 +922,9 @@ preOpDirAttr, new WccData(Nfs3Utils.getWccAttr(preOpDirAttr), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE) : EnumSet.of(CreateFlag.CREATE); - fos = new HdfsDataOutputStream(dfsClient.create(fileIdPath, permission, - flag, false, replication, blockSize, null, bufferSize, null), + fos = dfsClient.createWrappedOutputStream( + dfsClient.create(fileIdPath, permission, flag, false, replication, + blockSize, null, bufferSize, null), statistics); if ((createMode == Nfs3Constant.CREATE_UNCHECKED) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java index 05b976da8be3a..acd47fb96c90b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestRpcProgramNfs3.java @@ -17,19 +17,28 @@ */ package org.apache.hadoop.hdfs.nfs.nfs3; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.EOFException; +import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.EnumSet; +import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; +import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.client.HdfsAdmin; import org.apache.hadoop.hdfs.nfs.conf.NfsConfigKeys; import org.apache.hadoop.hdfs.nfs.conf.NfsConfiguration; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -98,12 +107,16 @@ public class TestRpcProgramNfs3 { static DistributedFileSystem hdfs; static MiniDFSCluster cluster = null; static NfsConfiguration config = new NfsConfiguration(); + static HdfsAdmin dfsAdmin; static NameNode nn; static Nfs3 nfs; static RpcProgramNfs3 nfsd; static SecurityHandler securityHandler; static SecurityHandler securityHandlerUnpriviledged; static String testdir = "/tmp"; + private static final String TEST_KEY = "testKey"; + private static FileSystemTestHelper fsHelper; + private static File testRootDir; @BeforeClass public static void setup() throws Exception { @@ -114,12 +127,20 @@ public static void setup() throws Exception { .getProxySuperuserGroupConfKey(currentUser), "*"); config.set(DefaultImpersonationProvider.getTestProvider() .getProxySuperuserIpConfKey(currentUser), "*"); + fsHelper = new FileSystemTestHelper(); + // Set up java key store + String testRoot = fsHelper.getTestRootDir(); + testRootDir = new File(testRoot).getAbsoluteFile(); + final Path jksPath = new Path(testRootDir.toString(), "test.jks"); + config.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, + JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri()); ProxyUsers.refreshSuperUserGroupsConfiguration(config); cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build(); cluster.waitActive(); hdfs = cluster.getFileSystem(); nn = cluster.getNameNode(); + dfsAdmin = new HdfsAdmin(cluster.getURI(), config); // Use ephemeral ports in case tests are running in parallel config.setInt("nfs3.mountd.port", 0); @@ -131,6 +152,8 @@ public static void setup() throws Exception { nfs.startServiceInternal(false); nfsd = (RpcProgramNfs3) nfs.getRpcProgram(); + hdfs.getClient().setKeyProvider(nn.getNamesystem().getProvider()); + DFSTestUtil.createKey(TEST_KEY, cluster, config); // Mock SecurityHandler which returns system user.name securityHandler = Mockito.mock(SecurityHandler.class); @@ -310,6 +333,105 @@ public void testRead() throws Exception { response2.getStatus()); } + @Test(timeout = 120000) + public void testEncryptedReadWrite() throws Exception { + final int len = 8192; + + final Path zone = new Path("/zone"); + hdfs.mkdirs(zone); + dfsAdmin.createEncryptionZone(zone, TEST_KEY); + + final byte[] buffer = new byte[len]; + for (int i = 0; i < len; i++) { + buffer[i] = (byte) i; + } + + final String encFile1 = "/zone/myfile"; + createFileUsingNfs(encFile1, buffer); + commit(encFile1, len); + assertArrayEquals("encFile1 not equal", + getFileContentsUsingNfs(encFile1, len), + getFileContentsUsingDfs(encFile1, len)); + + /* + * Same thing except this time create the encrypted file using DFS. + */ + final String encFile2 = "/zone/myfile2"; + final Path encFile2Path = new Path(encFile2); + DFSTestUtil.createFile(hdfs, encFile2Path, len, (short) 1, 0xFEED); + assertArrayEquals("encFile2 not equal", + getFileContentsUsingNfs(encFile2, len), + getFileContentsUsingDfs(encFile2, len)); + } + + private void createFileUsingNfs(String fileName, byte[] buffer) + throws Exception { + DFSTestUtil.createFile(hdfs, new Path(fileName), 0, (short) 1, 0); + + final HdfsFileStatus status = nn.getRpcServer().getFileInfo(fileName); + final long dirId = status.getFileId(); + final FileHandle handle = new FileHandle(dirId); + + final WRITE3Request writeReq = new WRITE3Request(handle, 0, + buffer.length, WriteStableHow.DATA_SYNC, ByteBuffer.wrap(buffer)); + final XDR xdr_req = new XDR(); + writeReq.serialize(xdr_req); + + final WRITE3Response response = nfsd.write(xdr_req.asReadOnlyWrap(), + null, 1, securityHandler, + new InetSocketAddress("localhost", 1234)); + assertEquals("Incorrect response: ", null, response); + } + + private byte[] getFileContentsUsingNfs(String fileName, int len) + throws Exception { + final HdfsFileStatus status = nn.getRpcServer().getFileInfo(fileName); + final long dirId = status.getFileId(); + final FileHandle handle = new FileHandle(dirId); + + final READ3Request readReq = new READ3Request(handle, 0, len); + final XDR xdr_req = new XDR(); + readReq.serialize(xdr_req); + + final READ3Response response = nfsd.read(xdr_req.asReadOnlyWrap(), + securityHandler, new InetSocketAddress("localhost", 1234)); + assertEquals("Incorrect return code: ", Nfs3Status.NFS3_OK, + response.getStatus()); + assertTrue("expected full read", response.isEof()); + return response.getData().array(); + } + + private byte[] getFileContentsUsingDfs(String fileName, int len) + throws Exception { + final FSDataInputStream in = hdfs.open(new Path(fileName)); + final byte[] ret = new byte[len]; + in.readFully(ret); + try { + in.readByte(); + Assert.fail("expected end of file"); + } catch (EOFException e) { + // expected. Unfortunately there is no associated message to check + } + in.close(); + return ret; + } + + private void commit(String fileName, int len) throws Exception { + final HdfsFileStatus status = nn.getRpcServer().getFileInfo(fileName); + final long dirId = status.getFileId(); + final FileHandle handle = new FileHandle(dirId); + final XDR xdr_req = new XDR(); + final COMMIT3Request req = new COMMIT3Request(handle, 0, len); + req.serialize(xdr_req); + + Channel ch = Mockito.mock(Channel.class); + + COMMIT3Response response2 = nfsd.commit(xdr_req.asReadOnlyWrap(), + ch, 1, securityHandler, + new InetSocketAddress("localhost", 1234)); + assertEquals("Incorrect COMMIT3Response:", null, response2); + } + @Test(timeout = 60000) public void testWrite() throws Exception { HdfsFileStatus status = nn.getRpcServer().getFileInfo("/tmp/bar"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 752e77869cae8..9389d370aae0e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -18,6 +18,9 @@ Trunk (Unreleased) HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) + HDFS-6584. Support archival storage. (See breakdown of tasks below for + features and contributors) + IMPROVEMENTS HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common. @@ -130,6 +133,9 @@ Trunk (Unreleased) HDFS-6609. Use DirectorySnapshottableFeature to represent a snapshottable directory. (Jing Zhao via wheat9) + HDFS-6987. Move CipherSuite xattr information up to the encryption zone + root. (Zhe Zhang via wang) + OPTIMIZATIONS BUG FIXES @@ -261,6 +267,84 @@ Trunk (Unreleased) HDFS-6981. Fix DN upgrade with layout version change. (Arpit Agarwal) + BREAKDOWN OF HDFS-6584 ARCHIVAL STORAGE + + HDFS-6677. Change INodeFile and FSImage to support storage policy ID. + (szetszwo) + + HDFS-6670. Add block storage policy support with default HOT, WARM and COLD + policies. (szetszwo) + + HDFS-6671. Change BlockPlacementPolicy to consider block storage policy + in replicaiton. (szetszwo) + + HDFS-6710. Change BlockPlacementPolicy to consider block storage policy + in replica deletion. (szetszwo) + + HDFS-6679. Bump NameNodeLayoutVersion and update editsStored test files. + (vinayakumarb via szetszwo) + + HDFS-6686. Change BlockPlacementPolicy to use fallback when some storage + types are unavailable. (szetszwo) + + HDFS-6835. Add a new API to set storage policy. (jing9) + + HDFS-6847. Support storage policy on directories and include storage policy + in HdfsFileStatus. (Jing Zhao via szetszwo) + + HDFS-6801. Add a new data migration tool, Mover, for archiving data. + (szetszwo via jing9) + + HDFS-6863. Support migration for snapshot paths. (jing9) + + HDFS-6906. Add more tests for BlockStoragePolicy. (szetszwo via jing9) + + HDFS-6911. check if a block is already scheduled in Mover. + (szetszwo via jing9) + + HDFS-6920. Check the storage type of delNodeHintStorage when deleting + a replica. (szetszwo via jing9) + + HDFS-6944. Add retry and termination logic for Mover. (jing9) + + HDFS-6969. INode#getStoragePolicyID should always return the latest + storage policy. (jing9) + + HDFS-6961. BlockPlacementPolicy#chooseTarget should check each valid + storage type in each choosing round. (jing9) + + HDFS-6876. support set/get storage policy in DFSAdmin. (jing9) + + HDFS-6997. Add more tests for data migration and replicaion. (szetszwo) + + HDFS-6875. Support migration for a list of specified paths. (jing9) + + HDFS-7027. Mover does not terminate when some storage type is out of space. + (szetszwo via jing9) + + HDFS-7029. Fix TestDFSInotifyEventInputStream and TestDistributedFileSystem. + (szetszwo via jing9) + + HDFS-7028. FSDirectory should not get storage policy id from symlinks. + (szetszwo) + + HDFS-7034. Fix TestBlockPlacement and TestStorageMover. (jing9) + + HDFS-7039. Fix Balancer tests. (szetszwo via jing9) + + HDFS-7062. Skip under construction block for migration. (jing9) + + HDFS-7052. Add Mover into hdfs script. (jing9) + + HDFS-7072. Fix TestBlockManager and TestStorageMover. (jing9 via szetszwo) + + HDFS-6864. Archival Storage: add user documentation. (szetszwo via jing9) + + HDFS-7088. Archival Storage: fix TestBalancer and + TestBalancerWithMultipleNameNodes. (szetszwo via jing9) + + HDFS-7095. TestStorageMover often fails in Jenkins. (jing9) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES @@ -468,6 +552,26 @@ Release 2.6.0 - UNRELEASED HDFS-6851. Refactor EncryptionZoneWithId and EncryptionZone. (clamb via wang) + HDFS-6705. Create an XAttr that disallows the HDFS admin from accessing a + file. (clamb via wang) + + HDFS-6843. Create FileStatus isEncrypted() method (clamb via cmccabe) + + HDFS-7004. Update KeyProvider instantiation to create by URI. (wang) + + HDFS-7047. Expose FileStatus#isEncrypted in libhdfs (cmccabe) + + HDFS-7003. Add NFS Gateway support for reading and writing to + encryption zones. (clamb via wang) + + HDFS-6727. Refresh data volumes on DataNode based on configuration changes + (Lei Xu via cmccabe) + + HDFS-6970. Move startFile EDEK retries to the DFSClient. (wang) + + HDFS-6948. DN rejects blocks if it has older UC block + (Eric Payne via kihwal) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) @@ -667,6 +771,30 @@ Release 2.6.0 - UNRELEASED and TestDFSClientFailover.testDoesntDnsResolveLogicalURI failing on jdk7. (Akira Ajisaka via wang) + HDFS-6912. SharedFileDescriptorFactory should not allocate sparse files + (cmccabe) + + HDFS-7075. hadoop-fuse-dfs fails because it cannot find + JavaKeyStoreProvider$Factory (cmccabe) + + HDFS-7078. Fix listEZs to work correctly with snapshots. (wang) + + HDFS-6840. Clients are always sent to the same datanode when read + is off rack. (wang) + + HDFS-7065. Pipeline close recovery race can cause block corruption (kihwal) + + HDFS-7096. Fix TestRpcProgramNfs3 to use DFS_ENCRYPTION_KEY_PROVIDER_URI + (clamb via cmccabe) + + HDFS-7046. HA NN can NPE upon transition to active. (kihwal) + + HDFS-7106. Reconfiguring DataNode volumes does not release the lock files + in removed volumes. (cnauroth via cmccabe) + + HDFS-7001. Tests in TestTracing depends on the order of execution + (iwasakims via cmccabe) + BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS HDFS-6387. HDFS CLI admin tool for creating & deleting an @@ -780,6 +908,24 @@ Release 2.6.0 - UNRELEASED HDFS-7051. TestDataNodeRollingUpgrade#isBlockFileInPrevious assumes Unix file path separator. (cnauroth) + HDFS-7105. Fix TestJournalNode#testFailToStartWithBadConfig to match log + output change. (Ray Chiang via cnauroth) + + HDFS-7105. Allow falling back to a non-SASL connection on + DataTransferProtocol in several edge cases. (cnauroth) + + HDFS-7107. Avoid Findbugs warning for synchronization on + AbstractNNFailoverProxyProvider#fallbackToSimpleAuth. (cnauroth) + + HDFS-7109. TestDataStorage does not release file locks between tests. + (cnauroth) + + HDFS-7110. Skip tests related to short-circuit read on platforms that do not + currently implement short-circuit read. (cnauroth) + + HDFS-7115. TestEncryptionZones assumes Unix path separator for KMS key store + path. (Xiaoyu Yao via cnauroth) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index 2ba6d1b1a9c7b..22a0f0f8c0f3d 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -36,6 +36,8 @@ function hadoop_usage echo " journalnode run the DFS journalnode" echo " lsSnapshottableDir list all snapshottable dirs owned by the current user" echo " Use -help to see options" + echo " mover run a utility to move block replicas across" + echo " storage types" echo " namenode run the DFS namenode" echo " Use -format to initialize the DFS filesystem" echo " nfs3 run an NFS version 3 gateway" @@ -159,6 +161,11 @@ case ${COMMAND} in lsSnapshottableDir) CLASS=org.apache.hadoop.hdfs.tools.snapshot.LsSnapshottableDir ;; + mover) + CLASS=org.apache.hadoop.hdfs.server.mover.Mover + hadoop_debug "Appending HADOOP_MOVER_OPTS onto HADOOP_OPTS" + HADOOP_OPTS="${HADOOP_OPTS} ${HADOOP_MOVER_OPTS}" + ;; namenode) daemon="true" CLASS='org.apache.hadoop.hdfs.server.namenode.NameNode' diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd index f5f77f043d2f4..9fb84261e1c28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd @@ -47,7 +47,7 @@ if "%1" == "--config" ( goto print_usage ) - set hdfscommands=dfs namenode secondarynamenode journalnode zkfc datanode dfsadmin haadmin fsck balancer jmxget oiv oev fetchdt getconf groups snapshotDiff lsSnapshottableDir cacheadmin + set hdfscommands=dfs namenode secondarynamenode journalnode zkfc datanode dfsadmin haadmin fsck balancer jmxget oiv oev fetchdt getconf groups snapshotDiff lsSnapshottableDir cacheadmin mover for %%i in ( %hdfscommands% ) do ( if %hdfs-command% == %%i set hdfscommand=true ) @@ -150,6 +150,11 @@ goto :eof set CLASS=org.apache.hadoop.hdfs.tools.CacheAdmin goto :eof +:mover + set CLASS=org.apache.hadoop.hdfs.server.mover.Mover + set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_MOVER_OPTS% + goto :eof + @rem This changes %1, %2 etc. Hence those cannot be used after calling this. :make_command_arguments if "%1" == "--config" ( @@ -198,6 +203,7 @@ goto :eof @echo lsSnapshottableDir list all snapshottable dirs owned by the current user @echo Use -help to see options @echo cacheadmin configure the HDFS cache + @echo mover run a utility to move block replicas across storage types @echo. @echo Most commands print help when invoked w/o parameters. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/blockStoragePolicy-site.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/blockStoragePolicy-site.xml new file mode 100644 index 0000000000000..04142ad80d05c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/blockStoragePolicy-site.xml @@ -0,0 +1,21 @@ + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/hdfs-site.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/hdfs-site.xml index 50ec1460bd60f..3a0b0ed88fa23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/hdfs-site.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/conf/hdfs-site.xml @@ -16,6 +16,7 @@ - + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java new file mode 100644 index 0000000000000..efbf8a00d6f9f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStoragePolicy.java @@ -0,0 +1,419 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs; + +import java.util.Arrays; +import java.util.EnumSet; +import java.util.LinkedList; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.XAttr; +import org.apache.hadoop.fs.XAttr.NameSpace; + +/** + * A block storage policy describes how to select the storage types + * for the replicas of a block. + */ +@InterfaceAudience.Private +public class BlockStoragePolicy { + public static final Log LOG = LogFactory.getLog(BlockStoragePolicy.class); + + public static final String DFS_BLOCK_STORAGE_POLICIES_KEY + = "dfs.block.storage.policies"; + public static final String DFS_BLOCK_STORAGE_POLICY_KEY_PREFIX + = "dfs.block.storage.policy."; + public static final String DFS_BLOCK_STORAGE_POLICY_CREATION_FALLBACK_KEY_PREFIX + = "dfs.block.storage.policy.creation-fallback."; + public static final String DFS_BLOCK_STORAGE_POLICY_REPLICATION_FALLBACK_KEY_PREFIX + = "dfs.block.storage.policy.replication-fallback."; + public static final String STORAGE_POLICY_XATTR_NAME = "bsp"; + /** set the namespace to TRUSTED so that only privilege users can access */ + public static final NameSpace XAttrNS = NameSpace.TRUSTED; + + public static final int ID_BIT_LENGTH = 4; + public static final int ID_MAX = (1 << ID_BIT_LENGTH) - 1; + public static final byte ID_UNSPECIFIED = 0; + + private static final Suite DEFAULT_SUITE = createDefaultSuite(); + + private static Suite createDefaultSuite() { + final BlockStoragePolicy[] policies = new BlockStoragePolicy[1 << ID_BIT_LENGTH]; + final StorageType[] storageTypes = {StorageType.DISK}; + final byte defaultPolicyId = 12; + policies[defaultPolicyId] = new BlockStoragePolicy(defaultPolicyId, "HOT", + storageTypes, StorageType.EMPTY_ARRAY, StorageType.EMPTY_ARRAY); + return new Suite(defaultPolicyId, policies); + } + + /** A block storage policy suite. */ + public static class Suite { + private final byte defaultPolicyID; + private final BlockStoragePolicy[] policies; + + private Suite(byte defaultPolicyID, BlockStoragePolicy[] policies) { + this.defaultPolicyID = defaultPolicyID; + this.policies = policies; + } + + /** @return the corresponding policy. */ + public BlockStoragePolicy getPolicy(byte id) { + // id == 0 means policy not specified. + return id == 0? getDefaultPolicy(): policies[id]; + } + + /** @return the default policy. */ + public BlockStoragePolicy getDefaultPolicy() { + return getPolicy(defaultPolicyID); + } + + public BlockStoragePolicy getPolicy(String policyName) { + if (policies != null) { + for (BlockStoragePolicy policy : policies) { + if (policy != null && policy.name.equals(policyName)) { + return policy; + } + } + } + return null; + } + } + + /** A 4-bit policy ID */ + private final byte id; + /** Policy name */ + private final String name; + + /** The storage types to store the replicas of a new block. */ + private final StorageType[] storageTypes; + /** The fallback storage type for block creation. */ + private final StorageType[] creationFallbacks; + /** The fallback storage type for replication. */ + private final StorageType[] replicationFallbacks; + + @VisibleForTesting + public BlockStoragePolicy(byte id, String name, StorageType[] storageTypes, + StorageType[] creationFallbacks, StorageType[] replicationFallbacks) { + this.id = id; + this.name = name; + this.storageTypes = storageTypes; + this.creationFallbacks = creationFallbacks; + this.replicationFallbacks = replicationFallbacks; + } + + /** + * @return a list of {@link StorageType}s for storing the replicas of a block. + */ + public List chooseStorageTypes(final short replication) { + final List types = new LinkedList(); + int i = 0; + for(; i < replication && i < storageTypes.length; i++) { + types.add(storageTypes[i]); + } + final StorageType last = storageTypes[storageTypes.length - 1]; + for(; i < replication; i++) { + types.add(last); + } + return types; + } + + /** + * Choose the storage types for storing the remaining replicas, given the + * replication number and the storage types of the chosen replicas. + * + * @param replication the replication number. + * @param chosen the storage types of the chosen replicas. + * @return a list of {@link StorageType}s for storing the replicas of a block. + */ + public List chooseStorageTypes(final short replication, + final Iterable chosen) { + return chooseStorageTypes(replication, chosen, null); + } + + private List chooseStorageTypes(final short replication, + final Iterable chosen, final List excess) { + final List types = chooseStorageTypes(replication); + diff(types, chosen, excess); + return types; + } + + /** + * Choose the storage types for storing the remaining replicas, given the + * replication number, the storage types of the chosen replicas and + * the unavailable storage types. It uses fallback storage in case that + * the desired storage type is unavailable. + * + * @param replication the replication number. + * @param chosen the storage types of the chosen replicas. + * @param unavailables the unavailable storage types. + * @param isNewBlock Is it for new block creation? + * @return a list of {@link StorageType}s for storing the replicas of a block. + */ + public List chooseStorageTypes(final short replication, + final Iterable chosen, + final EnumSet unavailables, + final boolean isNewBlock) { + final List excess = new LinkedList(); + final List storageTypes = chooseStorageTypes( + replication, chosen, excess); + final int expectedSize = storageTypes.size() - excess.size(); + final List removed = new LinkedList(); + for(int i = storageTypes.size() - 1; i >= 0; i--) { + // replace/remove unavailable storage types. + final StorageType t = storageTypes.get(i); + if (unavailables.contains(t)) { + final StorageType fallback = isNewBlock? + getCreationFallback(unavailables) + : getReplicationFallback(unavailables); + if (fallback == null) { + removed.add(storageTypes.remove(i)); + } else { + storageTypes.set(i, fallback); + } + } + } + // remove excess storage types after fallback replacement. + diff(storageTypes, excess, null); + if (storageTypes.size() < expectedSize) { + LOG.warn("Failed to place enough replicas: expected size is " + expectedSize + + " but only " + storageTypes.size() + " storage types can be selected " + + "(replication=" + replication + + ", selected=" + storageTypes + + ", unavailable=" + unavailables + + ", removed=" + removed + + ", policy=" + this + ")"); + } + return storageTypes; + } + + /** + * Compute the list difference t = t - c. + * Further, if e is not null, set e = e + c - t; + */ + private static void diff(List t, Iterable c, + List e) { + for(StorageType storagetype : c) { + final int i = t.indexOf(storagetype); + if (i >= 0) { + t.remove(i); + } else if (e != null) { + e.add(storagetype); + } + } + } + + /** + * Choose excess storage types for deletion, given the + * replication number and the storage types of the chosen replicas. + * + * @param replication the replication number. + * @param chosen the storage types of the chosen replicas. + * @return a list of {@link StorageType}s for deletion. + */ + public List chooseExcess(final short replication, + final Iterable chosen) { + final List types = chooseStorageTypes(replication); + final List excess = new LinkedList(); + diff(types, chosen, excess); + return excess; + } + + /** @return the fallback {@link StorageType} for creation. */ + public StorageType getCreationFallback(EnumSet unavailables) { + return getFallback(unavailables, creationFallbacks); + } + + /** @return the fallback {@link StorageType} for replication. */ + public StorageType getReplicationFallback(EnumSet unavailables) { + return getFallback(unavailables, replicationFallbacks); + } + + @Override + public int hashCode() { + return Byte.valueOf(id).hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } else if (obj == null || !(obj instanceof BlockStoragePolicy)) { + return false; + } + final BlockStoragePolicy that = (BlockStoragePolicy)obj; + return this.id == that.id; + } + + @Override + public String toString() { + return getClass().getSimpleName() + "{" + name + ":" + id + + ", storageTypes=" + Arrays.asList(storageTypes) + + ", creationFallbacks=" + Arrays.asList(creationFallbacks) + + ", replicationFallbacks=" + Arrays.asList(replicationFallbacks); + } + + public byte getId() { + return id; + } + + public String getName() { + return name; + } + + private static StorageType getFallback(EnumSet unavailables, + StorageType[] fallbacks) { + for(StorageType fb : fallbacks) { + if (!unavailables.contains(fb)) { + return fb; + } + } + return null; + } + + private static byte parseID(String idString, String element, Configuration conf) { + byte id = 0; + try { + id = Byte.parseByte(idString); + } catch(NumberFormatException nfe) { + throwIllegalArgumentException("Failed to parse policy ID \"" + idString + + "\" to a " + ID_BIT_LENGTH + "-bit integer", conf); + } + if (id < 0) { + throwIllegalArgumentException("Invalid policy ID: id = " + id + + " < 1 in \"" + element + "\"", conf); + } else if (id == 0) { + throw new IllegalArgumentException("Policy ID 0 is reserved: " + element); + } else if (id > ID_MAX) { + throwIllegalArgumentException("Invalid policy ID: id = " + id + + " > MAX = " + ID_MAX + " in \"" + element + "\"", conf); + } + return id; + } + + private static StorageType[] parseStorageTypes(String[] strings) { + if (strings == null || strings.length == 0) { + return StorageType.EMPTY_ARRAY; + } + final StorageType[] types = new StorageType[strings.length]; + for(int i = 0; i < types.length; i++) { + types[i] = StorageType.valueOf(strings[i].trim().toUpperCase()); + } + return types; + } + + private static StorageType[] readStorageTypes(byte id, String keyPrefix, + Configuration conf) { + final String key = keyPrefix + id; + final String[] values = conf.getStrings(key); + try { + return parseStorageTypes(values); + } catch(Exception e) { + throw new IllegalArgumentException("Failed to parse " + key + + " \"" + conf.get(key), e); + } + } + + private static BlockStoragePolicy readBlockStoragePolicy(byte id, String name, + Configuration conf) { + final StorageType[] storageTypes = readStorageTypes(id, + DFS_BLOCK_STORAGE_POLICY_KEY_PREFIX, conf); + if (storageTypes.length == 0) { + throw new IllegalArgumentException( + DFS_BLOCK_STORAGE_POLICY_KEY_PREFIX + id + " is missing or is empty."); + } + final StorageType[] creationFallbacks = readStorageTypes(id, + DFS_BLOCK_STORAGE_POLICY_CREATION_FALLBACK_KEY_PREFIX, conf); + final StorageType[] replicationFallbacks = readStorageTypes(id, + DFS_BLOCK_STORAGE_POLICY_REPLICATION_FALLBACK_KEY_PREFIX, conf); + return new BlockStoragePolicy(id, name, storageTypes, creationFallbacks, + replicationFallbacks); + } + + /** Read {@link Suite} from conf. */ + public static Suite readBlockStorageSuite(Configuration conf) { + final BlockStoragePolicy[] policies = new BlockStoragePolicy[1 << ID_BIT_LENGTH]; + final String[] values = conf.getStrings(DFS_BLOCK_STORAGE_POLICIES_KEY); + if (values == null) { + // conf property is missing, use default suite. + return DEFAULT_SUITE; + } + byte firstID = -1; + for(String v : values) { + v = v.trim(); + final int i = v.indexOf(':'); + if (i < 0) { + throwIllegalArgumentException("Failed to parse element \"" + v + + "\" (expected format is NAME:ID)", conf); + } else if (i == 0) { + throwIllegalArgumentException("Policy name is missing in \"" + v + "\"", conf); + } else if (i == v.length() - 1) { + throwIllegalArgumentException("Policy ID is missing in \"" + v + "\"", conf); + } + final String name = v.substring(0, i).trim(); + for(int j = 1; j < policies.length; j++) { + if (policies[j] != null && policies[j].name.equals(name)) { + throwIllegalArgumentException("Policy name duplication: \"" + + name + "\" appears more than once", conf); + } + } + + final byte id = parseID(v.substring(i + 1).trim(), v, conf); + if (policies[id] != null) { + throwIllegalArgumentException("Policy duplication: ID " + id + + " appears more than once", conf); + } + policies[id] = readBlockStoragePolicy(id, name, conf); + String prefix = ""; + if (firstID == -1) { + firstID = id; + prefix = "(default) "; + } + LOG.info(prefix + policies[id]); + } + if (firstID == -1) { + throwIllegalArgumentException("Empty list is not allowed", conf); + } + return new Suite(firstID, policies); + } + + public static String buildXAttrName() { + return XAttrNS.toString().toLowerCase() + "." + STORAGE_POLICY_XATTR_NAME; + } + + public static XAttr buildXAttr(byte policyId) { + final String name = buildXAttrName(); + return XAttrHelper.buildXAttr(name, new byte[] { policyId }); + } + + public static boolean isStoragePolicyXAttr(XAttr xattr) { + return xattr != null && xattr.getNameSpace() == BlockStoragePolicy.XAttrNS + && xattr.getName().equals(BlockStoragePolicy.STORAGE_POLICY_XATTR_NAME); + } + + private static void throwIllegalArgumentException(String message, + Configuration conf) { + throw new IllegalArgumentException(message + " in " + + DFS_BLOCK_STORAGE_POLICIES_KEY + " \"" + + conf.get(DFS_BLOCK_STORAGE_POLICIES_KEY) + "\"."); + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 456fac634251d..6f94370183740 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -22,8 +22,6 @@ .EncryptedKeyVersion; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CRYPTO_CODEC_CLASSES_KEY_PREFIX; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_CRYPTO_CIPHER_SUITE_KEY; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; @@ -90,6 +88,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -616,13 +615,15 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode, DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT); NameNodeProxies.ProxyAndInfo proxyInfo = null; + AtomicBoolean nnFallbackToSimpleAuth = new AtomicBoolean(false); if (numResponseToDrop > 0) { // This case is used for testing. LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY + " is set to " + numResponseToDrop + ", this hacked client will proactively drop responses"); proxyInfo = NameNodeProxies.createProxyWithLossyRetryHandler(conf, - nameNodeUri, ClientProtocol.class, numResponseToDrop); + nameNodeUri, ClientProtocol.class, numResponseToDrop, + nnFallbackToSimpleAuth); } if (proxyInfo != null) { @@ -637,7 +638,7 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode, Preconditions.checkArgument(nameNodeUri != null, "null URI"); proxyInfo = NameNodeProxies.createProxy(conf, nameNodeUri, - ClientProtocol.class); + ClientProtocol.class, nnFallbackToSimpleAuth); this.dtService = proxyInfo.getDelegationTokenService(); this.namenode = proxyInfo.getProxy(); } @@ -675,10 +676,7 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode, } this.saslClient = new SaslDataTransferClient( DataTransferSaslUtil.getSaslPropertiesResolver(conf), - TrustedChannelResolver.getInstance(conf), - conf.getBoolean( - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT)); + TrustedChannelResolver.getInstance(conf), nnFallbackToSimpleAuth); } /** @@ -1321,8 +1319,7 @@ private KeyVersion decryptEncryptedDataEncryptionKey(FileEncryptionInfo " an encrypted file"); } EncryptedKeyVersion ekv = EncryptedKeyVersion.createForDecryption( - //TODO: here we have to put the keyName to be provided by HDFS-6987 - null, feInfo.getEzKeyVersionName(), feInfo.getIV(), + feInfo.getKeyName(), feInfo.getEzKeyVersionName(), feInfo.getIV(), feInfo.getEncryptedDataEncryptionKey()); try { return provider.decryptEncryptedKey(ekv); @@ -1763,6 +1760,25 @@ public boolean setReplication(String src, short replication) } } + /** + * Set storage policy for an existing file/directory + * @param src file/directory name + * @param policyName name of the storage policy + */ + public void setStoragePolicy(String src, String policyName) + throws IOException { + try { + namenode.setStoragePolicy(src, policyName); + } catch (RemoteException e) { + throw e.unwrapRemoteException(AccessControlException.class, + FileNotFoundException.class, + SafeModeException.class, + NSQuotaExceededException.class, + UnresolvedPathException.class, + SnapshotAccessControlException.class); + } + } + /** * Rename file or directory. * @see ClientProtocol#rename(String, String) @@ -3089,4 +3105,18 @@ DFSHedgedReadMetrics getHedgedReadMetrics() { public KeyProviderCryptoExtension getKeyProvider() { return provider; } + + @VisibleForTesting + public void setKeyProvider(KeyProviderCryptoExtension provider) { + this.provider = provider; + } + + /** + * Returns the SaslDataTransferClient configured for this DFSClient. + * + * @return SaslDataTransferClient configured for this DFSClient + */ + public SaslDataTransferClient getSaslDataTransferClient() { + return saslClient; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 8443c7176e99c..3c5358f7213c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -221,9 +221,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_KEY = "dfs.namenode.min.supported.datanode.version"; public static final String DFS_NAMENODE_MIN_SUPPORTED_DATANODE_VERSION_DEFAULT = "3.0.0-SNAPSHOT"; - public static final String DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK = "dfs.namenode.randomize-block-locations-per-block"; - public static final boolean DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT = false; - public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum"; public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1; @@ -372,6 +369,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_BALANCER_MOVERTHREADS_DEFAULT = 1000; public static final String DFS_BALANCER_DISPATCHERTHREADS_KEY = "dfs.balancer.dispatcherThreads"; public static final int DFS_BALANCER_DISPATCHERTHREADS_DEFAULT = 200; + + public static final String DFS_MOVER_MOVEDWINWIDTH_KEY = "dfs.mover.movedWinWidth"; + public static final long DFS_MOVER_MOVEDWINWIDTH_DEFAULT = 5400*1000L; + public static final String DFS_MOVER_MOVERTHREADS_KEY = "dfs.mover.moverThreads"; + public static final int DFS_MOVER_MOVERTHREADS_DEFAULT = 1000; + public static final String DFS_DATANODE_ADDRESS_KEY = "dfs.datanode.address"; public static final int DFS_DATANODE_DEFAULT_PORT = 50010; public static final String DFS_DATANODE_ADDRESS_DEFAULT = "0.0.0.0:" + DFS_DATANODE_DEFAULT_PORT; @@ -435,6 +438,15 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final Class DFS_BLOCK_REPLICATOR_CLASSNAME_DEFAULT = BlockPlacementPolicyDefault.class; public static final String DFS_REPLICATION_MAX_KEY = "dfs.replication.max"; public static final int DFS_REPLICATION_MAX_DEFAULT = 512; + public static final String DFS_BLOCK_STORAGE_POLICIES_KEY + = BlockStoragePolicy.DFS_BLOCK_STORAGE_POLICIES_KEY; + public static final String DFS_BLOCK_STORAGE_POLICY_KEY_PREFIX + = BlockStoragePolicy.DFS_BLOCK_STORAGE_POLICY_KEY_PREFIX; + public static final String DFS_BLOCK_STORAGE_POLICY_CREATION_FALLBACK_KEY_PREFIX + = BlockStoragePolicy.DFS_BLOCK_STORAGE_POLICY_CREATION_FALLBACK_KEY_PREFIX; + public static final String DFS_BLOCK_STORAGE_POLICY_REPLICATION_FALLBACK_KEY_PREFIX + = BlockStoragePolicy.DFS_BLOCK_STORAGE_POLICY_REPLICATION_FALLBACK_KEY_PREFIX; + public static final String DFS_DF_INTERVAL_KEY = "dfs.df.interval"; public static final int DFS_DF_INTERVAL_DEFAULT = 60000; public static final String DFS_BLOCKREPORT_INTERVAL_MSEC_KEY = "dfs.blockreport.intervalMsec"; @@ -577,9 +589,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATA_ENCRYPTION_ALGORITHM_KEY = "dfs.encrypt.data.transfer.algorithm"; public static final String DFS_TRUSTEDCHANNEL_RESOLVER_CLASS = "dfs.trustedchannel.resolver.class"; public static final String DFS_DATA_TRANSFER_PROTECTION_KEY = "dfs.data.transfer.protection"; + public static final String DFS_DATA_TRANSFER_PROTECTION_DEFAULT = ""; public static final String DFS_DATA_TRANSFER_SASL_PROPS_RESOLVER_CLASS_KEY = "dfs.data.transfer.saslproperties.resolver.class"; public static final int DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES_DEFAULT = 100; public static final String DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES = "dfs.namenode.list.encryption.zones.num.responses"; + public static final String DFS_ENCRYPTION_KEY_PROVIDER_URI = "dfs.encryption.key.provider.uri"; // Journal-node related configs. These are read on the JN side. public static final String DFS_JOURNALNODE_EDITS_DIR_KEY = "dfs.journalnode.edits.dir"; @@ -690,4 +704,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_NAMENODE_INOTIFY_MAX_EVENTS_PER_RPC_DEFAULT = 1000; + public static final String IGNORE_SECURE_PORTS_FOR_TESTING_KEY = + "ignore.secure.ports.for.testing"; + public static final boolean IGNORE_SECURE_PORTS_FOR_TESTING_DEFAULT = false; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index d368f4efca51f..94a1ddc030677 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -41,6 +41,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.fs.CanSetDropBehind; @@ -76,6 +77,7 @@ import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException; +import org.apache.hadoop.hdfs.server.namenode.RetryStartFileException; import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; @@ -126,6 +128,13 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable, CanSetDropBehind { private static final int MAX_PACKETS = 80; // each packet 64K, total 5MB + /** + * Number of times to retry creating a file when there are transient + * errors (typically related to encryption zones and KeyProvider operations). + */ + @VisibleForTesting + public static final int CREATE_RETRY_COUNT = 10; + private final DFSClient dfsClient; private final long dfsclientSlowLogThresholdMs; private Socket s; @@ -1648,23 +1657,46 @@ static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src, short replication, long blockSize, Progressable progress, int buffersize, DataChecksum checksum, String[] favoredNodes, List cipherSuites) throws IOException { - final HdfsFileStatus stat; - try { - stat = dfsClient.namenode.create(src, masked, dfsClient.clientName, - new EnumSetWritable(flag), createParent, replication, - blockSize, cipherSuites); - } catch(RemoteException re) { - throw re.unwrapRemoteException(AccessControlException.class, - DSQuotaExceededException.class, - FileAlreadyExistsException.class, - FileNotFoundException.class, - ParentNotDirectoryException.class, - NSQuotaExceededException.class, - SafeModeException.class, - UnresolvedPathException.class, - SnapshotAccessControlException.class, - UnknownCipherSuiteException.class); + HdfsFileStatus stat = null; + + // Retry the create if we get a RetryStartFileException up to a maximum + // number of times + boolean shouldRetry = true; + int retryCount = CREATE_RETRY_COUNT; + while (shouldRetry) { + shouldRetry = false; + try { + stat = dfsClient.namenode.create(src, masked, dfsClient.clientName, + new EnumSetWritable(flag), createParent, replication, + blockSize, cipherSuites); + break; + } catch (RemoteException re) { + IOException e = re.unwrapRemoteException( + AccessControlException.class, + DSQuotaExceededException.class, + FileAlreadyExistsException.class, + FileNotFoundException.class, + ParentNotDirectoryException.class, + NSQuotaExceededException.class, + RetryStartFileException.class, + SafeModeException.class, + UnresolvedPathException.class, + SnapshotAccessControlException.class, + UnknownCipherSuiteException.class); + if (e instanceof RetryStartFileException) { + if (retryCount > 0) { + shouldRetry = true; + retryCount--; + } else { + throw new IOException("Too many retries because of encryption" + + " zone operations", e); + } + } else { + throw e; + } + } } + Preconditions.checkNotNull(stat, "HdfsFileStatus should not be null!"); final DFSOutputStream out = new DFSOutputStream(dfsClient, src, stat, flag, progress, checksum, favoredNodes); out.start(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 021890b98c4f5..aba86d1caa8c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -1794,34 +1794,37 @@ public static void assertAllResultsEqual(Collection objects) * Creates a new KeyProviderCryptoExtension by wrapping the * KeyProvider specified in the given Configuration. * - * @param conf Configuration specifying a single, non-transient KeyProvider. + * @param conf Configuration * @return new KeyProviderCryptoExtension, or null if no provider was found. * @throws IOException if the KeyProvider is improperly specified in * the Configuration */ public static KeyProviderCryptoExtension createKeyProviderCryptoExtension( final Configuration conf) throws IOException { - final List providers = KeyProviderFactory.getProviders(conf); - if (providers == null || providers.size() == 0) { + final String providerUriStr = + conf.get(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, null); + // No provider set in conf + if (providerUriStr == null) { return null; } - if (providers.size() > 1) { - StringBuilder builder = new StringBuilder(); - builder.append("Found multiple KeyProviders but only one is permitted ["); - String prefix = " "; - for (KeyProvider kp: providers) { - builder.append(prefix + kp.toString()); - prefix = ", "; - } - builder.append("]"); - throw new IOException(builder.toString()); - } - KeyProviderCryptoExtension provider = KeyProviderCryptoExtension - .createKeyProviderCryptoExtension(providers.get(0)); - if (provider.isTransient()) { - throw new IOException("KeyProvider " + provider.toString() + final URI providerUri; + try { + providerUri = new URI(providerUriStr); + } catch (URISyntaxException e) { + throw new IOException(e); + } + KeyProvider keyProvider = KeyProviderFactory.get(providerUri, conf); + if (keyProvider == null) { + throw new IOException("Could not instantiate KeyProvider from " + + DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI + " setting of '" + + providerUriStr +"'"); + } + if (keyProvider.isTransient()) { + throw new IOException("KeyProvider " + keyProvider.toString() + " was found but it is a transient provider."); } - return provider; + KeyProviderCryptoExtension cryptoProvider = KeyProviderCryptoExtension + .createKeyProviderCryptoExtension(keyProvider); + return cryptoProvider; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index dbdf5c1874f46..6bce8b9f0684c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -471,7 +471,39 @@ public Boolean next(final FileSystem fs, final Path p) } }.resolve(this, absF); } - + + /** + * Set the source path to the specified storage policy. + * + * @param src The source path referring to either a directory or a file. + * @param policyName The name of the storage policy. + */ + public void setStoragePolicy(final Path src, final String policyName) + throws IOException { + statistics.incrementWriteOps(1); + Path absF = fixRelativePart(src); + new FileSystemLinkResolver() { + @Override + public Void doCall(final Path p) + throws IOException, UnresolvedLinkException { + dfs.setStoragePolicy(getPathName(p), policyName); + return null; + } + @Override + public Void next(final FileSystem fs, final Path p) + throws IOException { + if (fs instanceof DistributedFileSystem) { + ((DistributedFileSystem) fs).setStoragePolicy(p, policyName); + return null; + } else { + throw new UnsupportedOperationException( + "Cannot perform setStoragePolicy on a non-DistributedFileSystem: " + + src + " -> " + p); + } + } + }.resolve(this, absF); + } + /** * Move blocks from srcs to trg and delete srcs afterwards. * The file block sizes must be the same. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java index 90acedea12c86..f91f7094bb41a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java @@ -244,7 +244,7 @@ public static boolean useLogicalUri(Configuration conf, URI nameNodeUri) // Create the proxy provider. Actual proxy is not created. AbstractNNFailoverProxyProvider provider = NameNodeProxies .createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class, - false); + false, null); // No need to use logical URI since failover is not configured. if (provider == null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 17653345ef956..fcc2f5fdb69b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -36,6 +36,7 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -145,13 +146,37 @@ public InetSocketAddress getAddress() { @SuppressWarnings("unchecked") public static ProxyAndInfo createProxy(Configuration conf, URI nameNodeUri, Class xface) throws IOException { + return createProxy(conf, nameNodeUri, xface, null); + } + + /** + * Creates the namenode proxy with the passed protocol. This will handle + * creation of either HA- or non-HA-enabled proxy objects, depending upon + * if the provided URI is a configured logical URI. + * + * @param conf the configuration containing the required IPC + * properties, client failover configurations, etc. + * @param nameNodeUri the URI pointing either to a specific NameNode + * or to a logical nameservice. + * @param xface the IPC interface which should be created + * @param fallbackToSimpleAuth set to true or false during calls to indicate if + * a secure client falls back to simple auth + * @return an object containing both the proxy and the associated + * delegation token service it corresponds to + * @throws IOException if there is an error creating the proxy + **/ + @SuppressWarnings("unchecked") + public static ProxyAndInfo createProxy(Configuration conf, + URI nameNodeUri, Class xface, AtomicBoolean fallbackToSimpleAuth) + throws IOException { AbstractNNFailoverProxyProvider failoverProxyProvider = - createFailoverProxyProvider(conf, nameNodeUri, xface, true); + createFailoverProxyProvider(conf, nameNodeUri, xface, true, + fallbackToSimpleAuth); if (failoverProxyProvider == null) { // Non-HA case return createNonHAProxy(conf, NameNode.getAddress(nameNodeUri), xface, - UserGroupInformation.getCurrentUser(), true); + UserGroupInformation.getCurrentUser(), true, fallbackToSimpleAuth); } else { // HA case Conf config = new Conf(conf); @@ -187,6 +212,8 @@ public static ProxyAndInfo createProxy(Configuration conf, * or to a logical nameservice. * @param xface the IPC interface which should be created * @param numResponseToDrop The number of responses to drop for each RPC call + * @param fallbackToSimpleAuth set to true or false during calls to indicate if + * a secure client falls back to simple auth * @return an object containing both the proxy and the associated * delegation token service it corresponds to. Will return null of the * given configuration does not support HA. @@ -195,10 +222,12 @@ public static ProxyAndInfo createProxy(Configuration conf, @SuppressWarnings("unchecked") public static ProxyAndInfo createProxyWithLossyRetryHandler( Configuration config, URI nameNodeUri, Class xface, - int numResponseToDrop) throws IOException { + int numResponseToDrop, AtomicBoolean fallbackToSimpleAuth) + throws IOException { Preconditions.checkArgument(numResponseToDrop > 0); AbstractNNFailoverProxyProvider failoverProxyProvider = - createFailoverProxyProvider(config, nameNodeUri, xface, true); + createFailoverProxyProvider(config, nameNodeUri, xface, true, + fallbackToSimpleAuth); if (failoverProxyProvider != null) { // HA case int delay = config.getInt( @@ -257,12 +286,35 @@ public static ProxyAndInfo createProxyWithLossyRetryHandler( public static ProxyAndInfo createNonHAProxy( Configuration conf, InetSocketAddress nnAddr, Class xface, UserGroupInformation ugi, boolean withRetries) throws IOException { + return createNonHAProxy(conf, nnAddr, xface, ugi, withRetries, null); + } + + /** + * Creates an explicitly non-HA-enabled proxy object. Most of the time you + * don't want to use this, and should instead use {@link NameNodeProxies#createProxy}. + * + * @param conf the configuration object + * @param nnAddr address of the remote NN to connect to + * @param xface the IPC interface which should be created + * @param ugi the user who is making the calls on the proxy object + * @param withRetries certain interfaces have a non-standard retry policy + * @param fallbackToSimpleAuth - set to true or false during this method to + * indicate if a secure client falls back to simple auth + * @return an object containing both the proxy and the associated + * delegation token service it corresponds to + * @throws IOException + */ + @SuppressWarnings("unchecked") + public static ProxyAndInfo createNonHAProxy( + Configuration conf, InetSocketAddress nnAddr, Class xface, + UserGroupInformation ugi, boolean withRetries, + AtomicBoolean fallbackToSimpleAuth) throws IOException { Text dtService = SecurityUtil.buildTokenService(nnAddr); T proxy; if (xface == ClientProtocol.class) { proxy = (T) createNNProxyWithClientProtocol(nnAddr, conf, ugi, - withRetries); + withRetries, fallbackToSimpleAuth); } else if (xface == JournalProtocol.class) { proxy = (T) createNNProxyWithJournalProtocol(nnAddr, conf, ugi); } else if (xface == NamenodeProtocol.class) { @@ -351,7 +403,8 @@ private static NamenodeProtocol createNNProxyWithNamenodeProtocol( private static ClientProtocol createNNProxyWithClientProtocol( InetSocketAddress address, Configuration conf, UserGroupInformation ugi, - boolean withRetries) throws IOException { + boolean withRetries, AtomicBoolean fallbackToSimpleAuth) + throws IOException { RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class); final RetryPolicy defaultPolicy = @@ -367,8 +420,8 @@ private static ClientProtocol createNNProxyWithClientProtocol( ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy( ClientNamenodeProtocolPB.class, version, address, ugi, conf, NetUtils.getDefaultSocketFactory(conf), - org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy) - .getProxy(); + org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy, + fallbackToSimpleAuth).getProxy(); if (withRetries) { // create the proxy with retries @@ -440,8 +493,8 @@ public static Class> getFailoverProxyProviderClass( /** Creates the Failover proxy provider instance*/ @VisibleForTesting public static AbstractNNFailoverProxyProvider createFailoverProxyProvider( - Configuration conf, URI nameNodeUri, Class xface, boolean checkPort) - throws IOException { + Configuration conf, URI nameNodeUri, Class xface, boolean checkPort, + AtomicBoolean fallbackToSimpleAuth) throws IOException { Class> failoverProxyProviderClass = null; AbstractNNFailoverProxyProvider providerNN; Preconditions.checkArgument( @@ -490,6 +543,7 @@ public static AbstractNNFailoverProxyProvider createFailoverProxyProvider + " and does not use port information."); } } + providerNN.setFallbackToSimpleAuth(fallbackToSimpleAuth); return providerNN; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/StorageType.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/StorageType.java index 3d8133c7ce52c..7ca9e00c7047b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/StorageType.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/StorageType.java @@ -32,9 +32,11 @@ @InterfaceStability.Unstable public enum StorageType { DISK, - SSD; + SSD, + ARCHIVE; public static final StorageType DEFAULT = DISK; + public static final StorageType[] EMPTY_ARRAY = {}; private static final StorageType[] VALUES = values(); @@ -42,4 +44,4 @@ public enum StorageType { public static List asList() { return Arrays.asList(VALUES); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index d8b7a790d3f94..4be83f2dd2573 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -259,6 +259,20 @@ public boolean setReplication(String src, short replication) FileNotFoundException, SafeModeException, UnresolvedLinkException, SnapshotAccessControlException, IOException; + /** + * Set the storage policy for a file/directory + * @param src Path of an existing file/directory. + * @param policyName The name of the storage policy + * @throws SnapshotAccessControlException If access is denied + * @throws UnresolvedLinkException if src contains a symlink + * @throws FileNotFoundException If file/dir src is not found + * @throws QuotaExceededException If changes violate the quota restriction + */ + @Idempotent + public void setStoragePolicy(String src, String policyName) + throws SnapshotAccessControlException, UnresolvedLinkException, + FileNotFoundException, QuotaExceededException, IOException; + /** * Set permissions for an existing file/directory. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZone.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZone.java index a20513a702c84..58e9ebad17321 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZone.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZone.java @@ -21,6 +21,7 @@ import org.apache.commons.lang.builder.HashCodeBuilder; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.crypto.CipherSuite; /** * A simple class for representing an encryption zone. Presently an encryption @@ -31,32 +32,40 @@ @InterfaceStability.Evolving public class EncryptionZone { + private final long id; private final String path; + private final CipherSuite suite; private final String keyName; - private final long id; - public EncryptionZone(String path, String keyName, long id) { + public EncryptionZone(long id, String path, + CipherSuite suite, String keyName) { + this.id = id; this.path = path; + this.suite = suite; this.keyName = keyName; - this.id = id; + } + + public long getId() { + return id; } public String getPath() { return path; } - public String getKeyName() { - return keyName; + public CipherSuite getSuite() { + return suite; } - public long getId() { - return id; + public String getKeyName() { + return keyName; } @Override public int hashCode() { return new HashCodeBuilder(13, 31). - append(path).append(keyName).append(id). + append(id).append(path). + append(suite).append(keyName). toHashCode(); } @@ -74,16 +83,18 @@ public boolean equals(Object obj) { EncryptionZone rhs = (EncryptionZone) obj; return new EqualsBuilder(). + append(id, rhs.id). append(path, rhs.path). + append(suite, rhs.suite). append(keyName, rhs.keyName). - append(id, rhs.id). isEquals(); } @Override public String toString() { - return "EncryptionZone [path=" + path + - ", keyName=" + keyName + - ", id=" + id + "]"; + return "EncryptionZone [id=" + id + + ", path=" + path + + ", suite=" + suite + + ", keyName=" + keyName + "]"; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsPermissionExtension.java similarity index 67% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java rename to hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsPermissionExtension.java index de2762df5e633..f74472d13cd13 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsAclPermission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/FsPermissionExtension.java @@ -21,39 +21,46 @@ import org.apache.hadoop.fs.permission.FsPermission; /** - * HDFS permission subclass used to indicate an ACL is present. The ACL bit is - * not visible directly to users of {@link FsPermission} serialization. This is + * HDFS permission subclass used to indicate an ACL is present and/or that the + * underlying file/dir is encrypted. The ACL/encrypted bits are not visible + * directly to users of {@link FsPermission} serialization. This is * done for backwards compatibility in case any existing clients assume the * value of FsPermission is in a particular range. */ @InterfaceAudience.Private -public class FsAclPermission extends FsPermission { +public class FsPermissionExtension extends FsPermission { private final static short ACL_BIT = 1 << 12; + private final static short ENCRYPTED_BIT = 1 << 13; private final boolean aclBit; + private final boolean encryptedBit; /** - * Constructs a new FsAclPermission based on the given FsPermission. + * Constructs a new FsPermissionExtension based on the given FsPermission. * * @param perm FsPermission containing permission bits */ - public FsAclPermission(FsPermission perm) { + public FsPermissionExtension(FsPermission perm, boolean hasAcl, + boolean isEncrypted) { super(perm.toShort()); - aclBit = true; + aclBit = hasAcl; + encryptedBit = isEncrypted; } /** - * Creates a new FsAclPermission by calling the base class constructor. + * Creates a new FsPermissionExtension by calling the base class constructor. * * @param perm short containing permission bits */ - public FsAclPermission(short perm) { + public FsPermissionExtension(short perm) { super(perm); aclBit = (perm & ACL_BIT) != 0; + encryptedBit = (perm & ENCRYPTED_BIT) != 0; } @Override public short toExtendedShort() { - return (short)(toShort() | (aclBit ? ACL_BIT : 0)); + return (short)(toShort() | + (aclBit ? ACL_BIT : 0) | (encryptedBit ? ENCRYPTED_BIT : 0)); } @Override @@ -61,6 +68,11 @@ public boolean getAclBit() { return aclBit; } + @Override + public boolean getEncryptedBit() { + return encryptedBit; + } + @Override public boolean equals(Object o) { // This intentionally delegates to the base class. This is only overridden diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java index 240dcd01acbb6..b0c65c3a5f932 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java @@ -160,5 +160,8 @@ public static enum DatanodeReportType { = DFSUtil.string2Bytes(DOT_SNAPSHOT_DIR); public static final String SEPARATOR_DOT_SNAPSHOT_DIR - = Path.SEPARATOR + DOT_SNAPSHOT_DIR; + = Path.SEPARATOR + DOT_SNAPSHOT_DIR; + + public static final String SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR + = Path.SEPARATOR + DOT_SNAPSHOT_DIR + Path.SEPARATOR; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java index 3d056396e6631..94d9a92cfa20e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java @@ -50,6 +50,7 @@ public class HdfsFileStatus { // Used by dir, not including dot and dotdot. Always zero for a regular file. private final int childrenNum; + private final byte storagePolicy; public static final byte[] EMPTY_NAME = new byte[0]; @@ -71,7 +72,8 @@ public class HdfsFileStatus { public HdfsFileStatus(long length, boolean isdir, int block_replication, long blocksize, long modification_time, long access_time, FsPermission permission, String owner, String group, byte[] symlink, - byte[] path, long fileId, int childrenNum, FileEncryptionInfo feInfo) { + byte[] path, long fileId, int childrenNum, FileEncryptionInfo feInfo, + byte storagePolicy) { this.length = length; this.isdir = isdir; this.block_replication = (short)block_replication; @@ -90,6 +92,7 @@ public HdfsFileStatus(long length, boolean isdir, int block_replication, this.fileId = fileId; this.childrenNum = childrenNum; this.feInfo = feInfo; + this.storagePolicy = storagePolicy; } /** @@ -251,6 +254,11 @@ public final int getChildrenNum() { return childrenNum; } + /** @return the storage policy id */ + public final byte getStoragePolicy() { + return storagePolicy; + } + public final FileStatus makeQualified(URI defaultUri, Path path) { return new FileStatus(getLen(), isDir(), getReplication(), getBlockSize(), getModificationTime(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java index a78b8bc2a8881..7e602bfcdfd26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsLocatedFileStatus.java @@ -35,7 +35,7 @@ @InterfaceStability.Evolving public class HdfsLocatedFileStatus extends HdfsFileStatus { private final LocatedBlocks locations; - + /** * Constructor * @@ -58,13 +58,13 @@ public HdfsLocatedFileStatus(long length, boolean isdir, int block_replication, long blocksize, long modification_time, long access_time, FsPermission permission, String owner, String group, byte[] symlink, byte[] path, long fileId, LocatedBlocks locations, - int childrenNum, FileEncryptionInfo feInfo) { + int childrenNum, FileEncryptionInfo feInfo, byte storagePolicy) { super(length, isdir, block_replication, blocksize, modification_time, - access_time, permission, owner, group, symlink, path, fileId, - childrenNum, feInfo); + access_time, permission, owner, group, symlink, path, fileId, + childrenNum, feInfo, storagePolicy); this.locations = locations; } - + public LocatedBlocks getBlockLocations() { return locations; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshottableDirectoryStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshottableDirectoryStatus.java index d3952833d1bad..13acc7a76b68a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshottableDirectoryStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/SnapshottableDirectoryStatus.java @@ -24,6 +24,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSUtil; /** @@ -61,7 +62,7 @@ public SnapshottableDirectoryStatus(long modification_time, long access_time, int snapshotNumber, int snapshotQuota, byte[] parentFullPath) { this.dirStatus = new HdfsFileStatus(0, true, 0, 0, modification_time, access_time, permission, owner, group, null, localName, inodeId, - childrenNum, null); + childrenNum, null, BlockStoragePolicy.ID_UNSPECIFIED); this.snapshotNumber = snapshotNumber; this.snapshotQuota = snapshotQuota; this.parentFullPath = parentFullPath; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java index 643af4a9fb1a7..9df9929dfd4b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java @@ -28,6 +28,7 @@ import java.net.InetAddress; import java.net.Socket; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import javax.security.auth.callback.Callback; import javax.security.auth.callback.CallbackHandler; @@ -71,21 +72,38 @@ public class SaslDataTransferClient { private static final Logger LOG = LoggerFactory.getLogger( SaslDataTransferClient.class); - private final boolean fallbackToSimpleAuthAllowed; + private final AtomicBoolean fallbackToSimpleAuth; private final SaslPropertiesResolver saslPropsResolver; private final TrustedChannelResolver trustedChannelResolver; + /** + * Creates a new SaslDataTransferClient. This constructor is used in cases + * where it is not relevant to track if a secure client did a fallback to + * simple auth. For intra-cluster connections between data nodes in the same + * cluster, we can assume that all run under the same security configuration. + * + * @param saslPropsResolver for determining properties of SASL negotiation + * @param trustedChannelResolver for identifying trusted connections that do + * not require SASL negotiation + */ + public SaslDataTransferClient(SaslPropertiesResolver saslPropsResolver, + TrustedChannelResolver trustedChannelResolver) { + this(saslPropsResolver, trustedChannelResolver, null); + } + /** * Creates a new SaslDataTransferClient. * * @param saslPropsResolver for determining properties of SASL negotiation * @param trustedChannelResolver for identifying trusted connections that do * not require SASL negotiation + * @param fallbackToSimpleAuth checked on each attempt at general SASL + * handshake, if true forces use of simple auth */ public SaslDataTransferClient(SaslPropertiesResolver saslPropsResolver, TrustedChannelResolver trustedChannelResolver, - boolean fallbackToSimpleAuthAllowed) { - this.fallbackToSimpleAuthAllowed = fallbackToSimpleAuthAllowed; + AtomicBoolean fallbackToSimpleAuth) { + this.fallbackToSimpleAuth = fallbackToSimpleAuth; this.saslPropsResolver = saslPropsResolver; this.trustedChannelResolver = trustedChannelResolver; } @@ -221,22 +239,26 @@ private IOStreamPair send(InetAddress addr, OutputStream underlyingOut, "SASL client skipping handshake in secured configuration with " + "privileged port for addr = {}, datanodeId = {}", addr, datanodeId); return null; - } else if (accessToken.getIdentifier().length == 0) { - if (!fallbackToSimpleAuthAllowed) { - throw new IOException( - "No block access token was provided (insecure cluster), but this " + - "client is configured to allow only secure connections."); - } + } else if (fallbackToSimpleAuth != null && fallbackToSimpleAuth.get()) { LOG.debug( "SASL client skipping handshake in secured configuration with " + "unsecured cluster for addr = {}, datanodeId = {}", addr, datanodeId); return null; - } else { + } else if (saslPropsResolver != null) { LOG.debug( "SASL client doing general handshake for addr = {}, datanodeId = {}", addr, datanodeId); return getSaslStreams(addr, underlyingOut, underlyingIn, accessToken, datanodeId); + } else { + // It's a secured cluster using non-privileged ports, but no SASL. The + // only way this can happen is if the DataNode has + // ignore.secure.ports.for.testing configured, so this is a rare edge case. + LOG.debug( + "SASL client skipping handshake in secured configuration with no SASL " + + "protection configured for addr = {}, datanodeId = {}", + addr, datanodeId); + return null; } } @@ -348,12 +370,6 @@ private IOStreamPair getSaslStreams(InetAddress addr, OutputStream underlyingOut, InputStream underlyingIn, Token accessToken, DatanodeID datanodeId) throws IOException { - if (saslPropsResolver == null) { - throw new IOException(String.format("Cannot create a secured " + - "connection if DataNode listens on unprivileged port (%d) and no " + - "protection is defined in configuration property %s.", - datanodeId.getXferPort(), DFS_DATA_TRANSFER_PROTECTION_KEY)); - } Map saslProps = saslPropsResolver.getClientProperties(addr); String userName = buildUserName(accessToken); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java index 78570579323b6..2b82c82f26a81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java @@ -112,11 +112,29 @@ public IOStreamPair receive(Peer peer, OutputStream underlyingOut, "SASL server skipping handshake in unsecured configuration for " + "peer = {}, datanodeId = {}", peer, datanodeId); return new IOStreamPair(underlyingIn, underlyingOut); - } else { + } else if (dnConf.getSaslPropsResolver() != null) { LOG.debug( "SASL server doing general handshake for peer = {}, datanodeId = {}", peer, datanodeId); return getSaslStreams(peer, underlyingOut, underlyingIn, datanodeId); + } else if (dnConf.getIgnoreSecurePortsForTesting()) { + // It's a secured cluster using non-privileged ports, but no SASL. The + // only way this can happen is if the DataNode has + // ignore.secure.ports.for.testing configured, so this is a rare edge case. + LOG.debug( + "SASL server skipping handshake in secured configuration with no SASL " + + "protection configured for peer = {}, datanodeId = {}", + peer, datanodeId); + return new IOStreamPair(underlyingIn, underlyingOut); + } else { + // The error message here intentionally does not mention + // ignore.secure.ports.for.testing. That's intended for dev use only. + // This code path is not expected to execute ever, because DataNode startup + // checks for invalid configuration and aborts. + throw new IOException(String.format("Cannot create a secured " + + "connection if DataNode listens on unprivileged port (%d) and no " + + "protection is defined in configuration property %s.", + datanodeId.getXferPort(), DFS_DATA_TRANSFER_PROTECTION_KEY)); } } @@ -257,12 +275,6 @@ private byte[] getEncryptionKeyFromUserName(String userName) private IOStreamPair getSaslStreams(Peer peer, OutputStream underlyingOut, InputStream underlyingIn, final DatanodeID datanodeId) throws IOException { SaslPropertiesResolver saslPropsResolver = dnConf.getSaslPropsResolver(); - if (saslPropsResolver == null) { - throw new IOException(String.format("Cannot create a secured " + - "connection if DataNode listens on unprivileged port (%d) and no " + - "protection is defined in configuration property %s.", - datanodeId.getXferPort(), DFS_DATA_TRANSFER_PROTECTION_KEY)); - } Map saslProps = saslPropsResolver.getServerProperties( getPeerAddress(peer)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index 5af349ee08e87..9d0d13cff3455 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -173,6 +173,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetReplicationResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetSafeModeRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetSafeModeResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetStoragePolicyRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetStoragePolicyResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetTimesRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetTimesResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdateBlockForPipelineRequestProto; @@ -236,6 +238,8 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements static final GetSnapshottableDirListingResponseProto NULL_GET_SNAPSHOTTABLE_DIR_LISTING_RESPONSE = GetSnapshottableDirListingResponseProto.newBuilder().build(); + static final SetStoragePolicyResponseProto VOID_SET_STORAGE_POLICY_RESPONSE = + SetStoragePolicyResponseProto.newBuilder().build(); private static final CreateResponseProto VOID_CREATE_RESPONSE = CreateResponseProto.newBuilder().build(); @@ -1413,6 +1417,18 @@ public CheckAccessResponseProto checkAccess(RpcController controller, return VOID_CHECKACCESS_RESPONSE; } + @Override + public SetStoragePolicyResponseProto setStoragePolicy( + RpcController controller, SetStoragePolicyRequestProto request) + throws ServiceException { + try { + server.setStoragePolicy(request.getSrc(), request.getPolicyName()); + } catch (IOException e) { + throw new ServiceException(e); + } + return VOID_SET_STORAGE_POLICY_RESPONSE; + } + public GetCurrentEditLogTxidResponseProto getCurrentEditLogTxid(RpcController controller, GetCurrentEditLogTxidRequestProto req) throws ServiceException { try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 17e443920c45b..1279f7c24e42f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -64,7 +64,9 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException; +import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; +import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; import org.apache.hadoop.hdfs.protocol.proto.AclProtos.GetAclStatusRequestProto; @@ -152,6 +154,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdateBlockForPipelineRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CheckAccessRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetStoragePolicyRequestProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.CreateEncryptionZoneRequestProto; import org.apache.hadoop.hdfs.protocol.proto.EncryptionZonesProtos.GetEZForPathRequestProto; @@ -1435,6 +1438,19 @@ public void checkAccess(String path, FsAction mode) throws IOException { } } + @Override + public void setStoragePolicy(String src, String policyName) + throws SnapshotAccessControlException, UnresolvedLinkException, + FileNotFoundException, QuotaExceededException, IOException { + SetStoragePolicyRequestProto req = SetStoragePolicyRequestProto + .newBuilder().setSrc(src).setPolicyName(policyName).build(); + try { + rpcProxy.setStoragePolicy(null, req); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } + public long getCurrentEditLogTxid() throws IOException { GetCurrentEditLogTxidRequestProto req = GetCurrentEditLogTxidRequestProto .getDefaultInstance(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 64a6cfa23cb60..c0b71ebffeec7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -44,6 +44,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.proto.HAServiceProtocolProtos; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.inotify.Event; @@ -66,7 +67,7 @@ import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.fs.FileEncryptionInfo; -import org.apache.hadoop.hdfs.protocol.FsAclPermission; +import org.apache.hadoop.hdfs.protocol.FsPermissionExtension; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; @@ -893,9 +894,25 @@ public static BlockCommandProto convert(BlockCommand cmd) { } builder.addAllTargets(convert(cmd.getTargets())) .addAllTargetStorageUuids(convert(cmd.getTargetStorageIDs())); + StorageType[][] types = cmd.getTargetStorageTypes(); + if (types != null) { + builder.addAllTargetStorageTypes(convert(types)); + } return builder.build(); } - + + private static List convert(StorageType[][] types) { + List list = Lists.newArrayList(); + if (types != null) { + for (StorageType[] ts : types) { + StorageTypesProto.Builder builder = StorageTypesProto.newBuilder(); + builder.addAllStorageTypes(convertStorageTypes(ts)); + list.add(builder.build()); + } + } + return list; + } + public static BlockIdCommandProto convert(BlockIdCommand cmd) { BlockIdCommandProto.Builder builder = BlockIdCommandProto.newBuilder() .setBlockPoolId(cmd.getBlockPoolId()); @@ -1024,7 +1041,7 @@ public static BlockCommand convert(BlockCommandProto blkCmd) { } else { for(int i = 0; i < targetStorageTypes.length; i++) { List p = targetStorageTypesList.get(i).getStorageTypesList(); - targetStorageTypes[i] = p.toArray(new StorageType[p.size()]); + targetStorageTypes[i] = convertStorageTypes(p, targets[i].length); } } @@ -1264,7 +1281,7 @@ public static FsPermissionProto convert(FsPermission p) { } public static FsPermission convert(FsPermissionProto p) { - return new FsAclPermission((short)p.getPerm()); + return new FsPermissionExtension((short)p.getPerm()); } @@ -1330,8 +1347,9 @@ public static HdfsFileStatus convert(HdfsFileStatusProto fs) { fs.hasFileId()? fs.getFileId(): INodeId.GRANDFATHER_INODE_ID, fs.hasLocations() ? PBHelper.convert(fs.getLocations()) : null, fs.hasChildrenNum() ? fs.getChildrenNum() : -1, - fs.hasFileEncryptionInfo() ? convert(fs.getFileEncryptionInfo()) : - null); + fs.hasFileEncryptionInfo() ? convert(fs.getFileEncryptionInfo()) : null, + fs.hasStoragePolicy() ? (byte) fs.getStoragePolicy() + : BlockStoragePolicy.ID_UNSPECIFIED); } public static SnapshottableDirectoryStatus convert( @@ -1377,7 +1395,8 @@ public static HdfsFileStatusProto convert(HdfsFileStatus fs) { setGroup(fs.getGroup()). setFileId(fs.getFileId()). setChildrenNum(fs.getChildrenNum()). - setPath(ByteString.copyFrom(fs.getLocalNameInBytes())); + setPath(ByteString.copyFrom(fs.getLocalNameInBytes())). + setStoragePolicy(fs.getStoragePolicy()); if (fs.isSymlink()) { builder.setSymlink(ByteString.copyFrom(fs.getSymlinkInBytes())); } @@ -1385,7 +1404,8 @@ public static HdfsFileStatusProto convert(HdfsFileStatus fs) { builder.setFileEncryptionInfo(convert(fs.getFileEncryptionInfo())); } if (fs instanceof HdfsLocatedFileStatus) { - LocatedBlocks locations = ((HdfsLocatedFileStatus)fs).getBlockLocations(); + final HdfsLocatedFileStatus lfs = (HdfsLocatedFileStatus) fs; + LocatedBlocks locations = lfs.getBlockLocations(); if (locations != null) { builder.setLocations(PBHelper.convert(locations)); } @@ -1698,6 +1718,8 @@ public static StorageTypeProto convertStorageType(StorageType type) { return StorageTypeProto.DISK; case SSD: return StorageTypeProto.SSD; + case ARCHIVE: + return StorageTypeProto.ARCHIVE; default: throw new IllegalStateException( "BUG: StorageType not found, type=" + type); @@ -1726,6 +1748,8 @@ public static StorageType convertStorageType(StorageTypeProto type) { return StorageType.DISK; case SSD: return StorageType.SSD; + case ARCHIVE: + return StorageType.ARCHIVE; default: throw new IllegalStateException( "BUG: StorageTypeProto not found, type=" + type); @@ -2307,12 +2331,14 @@ public static EncryptionZoneProto convert(EncryptionZone zone) { return EncryptionZoneProto.newBuilder() .setId(zone.getId()) .setKeyName(zone.getKeyName()) - .setPath(zone.getPath()).build(); + .setPath(zone.getPath()) + .setSuite(convert(zone.getSuite())) + .build(); } public static EncryptionZone convert(EncryptionZoneProto proto) { - return new EncryptionZone(proto.getPath(), proto.getKeyName(), - proto.getId()); + return new EncryptionZone(proto.getId(), proto.getPath(), + convert(proto.getSuite()), proto.getKeyName()); } public static ShortCircuitShmSlotProto convert(SlotId slotId) { @@ -2637,6 +2663,30 @@ public static HdfsProtos.FileEncryptionInfoProto convert( .setKey(getByteString(info.getEncryptedDataEncryptionKey())) .setIv(getByteString(info.getIV())) .setEzKeyVersionName(info.getEzKeyVersionName()) + .setKeyName(info.getKeyName()) + .build(); + } + + public static HdfsProtos.PerFileEncryptionInfoProto convertPerFileEncInfo( + FileEncryptionInfo info) { + if (info == null) { + return null; + } + return HdfsProtos.PerFileEncryptionInfoProto.newBuilder() + .setKey(getByteString(info.getEncryptedDataEncryptionKey())) + .setIv(getByteString(info.getIV())) + .setEzKeyVersionName(info.getEzKeyVersionName()) + .build(); + } + + public static HdfsProtos.ZoneEncryptionInfoProto convert( + CipherSuite suite, String keyName) { + if (suite == null || keyName == null) { + return null; + } + return HdfsProtos.ZoneEncryptionInfoProto.newBuilder() + .setSuite(convert(suite)) + .setKeyName(keyName) .build(); } @@ -2649,7 +2699,20 @@ public static FileEncryptionInfo convert( byte[] key = proto.getKey().toByteArray(); byte[] iv = proto.getIv().toByteArray(); String ezKeyVersionName = proto.getEzKeyVersionName(); - return new FileEncryptionInfo(suite, key, iv, ezKeyVersionName); + String keyName = proto.getKeyName(); + return new FileEncryptionInfo(suite, key, iv, keyName, ezKeyVersionName); + } + + public static FileEncryptionInfo convert( + HdfsProtos.PerFileEncryptionInfoProto fileProto, + CipherSuite suite, String keyName) { + if (fileProto == null || suite == null || keyName == null) { + return null; + } + byte[] key = fileProto.getKey().toByteArray(); + byte[] iv = fileProto.getIv().toByteArray(); + String ezKeyVersionName = fileProto.getEzKeyVersionName(); + return new FileEncryptionInfo(suite, key, iv, keyName, ezKeyVersionName); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java index 79e2647d1ed0e..67994c899d146 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java @@ -23,7 +23,6 @@ import java.io.PrintStream; import java.net.URI; import java.text.DateFormat; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -54,6 +53,7 @@ import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Tool; @@ -270,7 +270,7 @@ private long init(List reports) { // over-utilized, above-average, below-average and under-utilized. long overLoadedBytes = 0L, underLoadedBytes = 0L; for(DatanodeStorageReport r : reports) { - final DDatanode dn = dispatcher.newDatanode(r); + final DDatanode dn = dispatcher.newDatanode(r.getDatanodeInfo()); for(StorageType t : StorageType.asList()) { final Double utilization = policy.getUtilization(r, t); if (utilization == null) { // datanode does not have such storage type @@ -294,7 +294,7 @@ private long init(List reports) { } g = s; } else { - g = dn.addStorageGroup(t, maxSize2Move); + g = dn.addTarget(t, maxSize2Move); if (thresholdDiff <= 0) { // within threshold belowAvgUtilized.add(g); } else { @@ -548,15 +548,10 @@ static int run(Collection namenodes, final Parameters p, final Formatter formatter = new Formatter(System.out); System.out.println("Time Stamp Iteration# Bytes Already Moved Bytes Left To Move Bytes Being Moved"); - final List connectors - = new ArrayList(namenodes.size()); + List connectors = Collections.emptyList(); try { - for (URI uri : namenodes) { - final NameNodeConnector nnc = new NameNodeConnector( - Balancer.class.getSimpleName(), uri, BALANCER_ID_PATH, conf); - nnc.getKeyManager().startBlockKeyUpdater(); - connectors.add(nnc); - } + connectors = NameNodeConnector.newNameNodeConnectors(namenodes, + Balancer.class.getSimpleName(), BALANCER_ID_PATH, conf); boolean done = false; for(int iteration = 0; !done; iteration++) { @@ -581,7 +576,7 @@ static int run(Collection namenodes, final Parameters p, } } finally { for(NameNodeConnector nnc : connectors) { - nnc.close(); + IOUtils.cleanup(LOG, nnc); } } return ExitStatus.SUCCESS.getExitCode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index 14d598936ca12..cea1ab71150ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -43,12 +43,13 @@ import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicLong; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -86,7 +87,11 @@ public class Dispatcher { private static final long MAX_BLOCKS_SIZE_TO_FETCH = 2 * GB; private static final int MAX_NO_PENDING_MOVE_ITERATIONS = 5; - private static final long DELAY_AFTER_ERROR = 10 * 1000L; // 10 seconds + /** + * the period of time to delay the usage of a DataNode after hitting + * errors when using it for migrating data + */ + private static long delayAfterErrors = 10 * 1000; private final NameNodeConnector nnc; private final SaslDataTransferClient saslClient; @@ -103,12 +108,14 @@ public class Dispatcher { private final MovedBlocks movedBlocks; /** Map (datanodeUuid,storageType -> StorageGroup) */ - private final StorageGroupMap storageGroupMap = new StorageGroupMap(); + private final StorageGroupMap storageGroupMap + = new StorageGroupMap(); private NetworkTopology cluster; private final ExecutorService moveExecutor; private final ExecutorService dispatchExecutor; + /** The maximum number of concurrent blocks moves at a datanode */ private final int maxConcurrentMovesPerNode; @@ -140,18 +147,18 @@ private void removeAllButRetain(MovedBlocks movedBlocks) { } } - static class StorageGroupMap { + public static class StorageGroupMap { private static String toKey(String datanodeUuid, StorageType storageType) { return datanodeUuid + ":" + storageType; } - private final Map map = new HashMap(); + private final Map map = new HashMap(); - StorageGroup get(String datanodeUuid, StorageType storageType) { + public G get(String datanodeUuid, StorageType storageType) { return map.get(toKey(datanodeUuid, storageType)); } - void put(StorageGroup g) { + public void put(G g) { final String key = toKey(g.getDatanodeInfo().getDatanodeUuid(), g.storageType); final StorageGroup existing = map.put(key, g); Preconditions.checkState(existing == null); @@ -164,24 +171,32 @@ int size() { void clear() { map.clear(); } + + public Collection values() { + return map.values(); + } } /** This class keeps track of a scheduled block move */ - private class PendingMove { + public class PendingMove { private DBlock block; private Source source; private DDatanode proxySource; private StorageGroup target; - private PendingMove() { + private PendingMove(Source source, StorageGroup target) { + this.source = source; + this.target = target; } @Override public String toString() { - final Block b = block.getBlock(); - return b + " with size=" + b.getNumBytes() + " from " - + source.getDisplayName() + " to " + target.getDisplayName() - + " through " + proxySource.datanode; + final Block b = block != null ? block.getBlock() : null; + String bStr = b != null ? (b + " with size=" + b.getNumBytes() + " ") + : " "; + return bStr + "from " + source.getDisplayName() + " to " + target + .getDisplayName() + " through " + (proxySource != null ? proxySource + .datanode : ""); } /** @@ -191,9 +206,11 @@ public String toString() { * @return true if a block and its proxy are chosen; false otherwise */ private boolean chooseBlockAndProxy() { + // source and target must have the same storage type + final StorageType t = source.getStorageType(); // iterate all source's blocks until find a good one for (Iterator i = source.getBlockIterator(); i.hasNext();) { - if (markMovedIfGoodBlock(i.next())) { + if (markMovedIfGoodBlock(i.next(), t)) { i.remove(); return true; } @@ -204,10 +221,10 @@ private boolean chooseBlockAndProxy() { /** * @return true if the given block is good for the tentative move. */ - private boolean markMovedIfGoodBlock(DBlock block) { + private boolean markMovedIfGoodBlock(DBlock block, StorageType targetStorageType) { synchronized (block) { synchronized (movedBlocks) { - if (isGoodBlockCandidate(source, target, block)) { + if (isGoodBlockCandidate(source, target, targetStorageType, block)) { this.block = block; if (chooseProxySource()) { movedBlocks.put(block); @@ -300,12 +317,13 @@ private void dispatch() { LOG.info("Successfully moved " + this); } catch (IOException e) { LOG.warn("Failed to move " + this + ": " + e.getMessage()); + target.getDDatanode().setHasFailure(); // Proxy or target may have some issues, delay before using these nodes // further in order to avoid a potential storm of "threads quota // exceeded" warnings when the dispatcher gets out of sync with work // going on in datanodes. - proxySource.activateDelay(DELAY_AFTER_ERROR); - target.getDDatanode().activateDelay(DELAY_AFTER_ERROR); + proxySource.activateDelay(delayAfterErrors); + target.getDDatanode().activateDelay(delayAfterErrors); } finally { IOUtils.closeStream(out); IOUtils.closeStream(in); @@ -356,10 +374,23 @@ private void reset() { } /** A class for keeping track of block locations in the dispatcher. */ - private static class DBlock extends MovedBlocks.Locations { - DBlock(Block block) { + public static class DBlock extends MovedBlocks.Locations { + public DBlock(Block block) { super(block); } + + @Override + public synchronized boolean isLocatedOn(StorageGroup loc) { + // currently we only check if replicas are located on the same DataNodes + // since we do not have the capability to store two replicas in the same + // DataNode even though they are on two different storage types + for (StorageGroup existing : locations) { + if (existing.getDatanodeInfo().equals(loc.getDatanodeInfo())) { + return true; + } + } + return false; + } } /** The class represents a desired move. */ @@ -378,10 +409,10 @@ long getSize() { } /** A class that keeps track of a datanode. */ - static class DDatanode { + public static class DDatanode { /** A group of storages in a datanode with the same storage type. */ - class StorageGroup { + public class StorageGroup { final StorageType storageType; final long maxSize2Move; private long scheduledSize = 0L; @@ -390,18 +421,26 @@ private StorageGroup(StorageType storageType, long maxSize2Move) { this.storageType = storageType; this.maxSize2Move = maxSize2Move; } + + public StorageType getStorageType() { + return storageType; + } private DDatanode getDDatanode() { return DDatanode.this; } - DatanodeInfo getDatanodeInfo() { + public DatanodeInfo getDatanodeInfo() { return DDatanode.this.datanode; } /** Decide if still need to move more bytes */ - synchronized boolean hasSpaceForScheduling() { - return availableSizeToMove() > 0L; + boolean hasSpaceForScheduling() { + return hasSpaceForScheduling(0L); + } + + synchronized boolean hasSpaceForScheduling(long size) { + return availableSizeToMove() > size; } /** @return the total number of bytes that need to be moved */ @@ -410,7 +449,7 @@ synchronized long availableSizeToMove() { } /** increment scheduled size */ - synchronized void incScheduledSize(long size) { + public synchronized void incScheduledSize(long size) { scheduledSize += size; } @@ -424,6 +463,18 @@ synchronized void resetScheduledSize() { scheduledSize = 0L; } + private PendingMove addPendingMove(DBlock block, final PendingMove pm) { + if (getDDatanode().addPendingBlock(pm)) { + if (pm.markMovedIfGoodBlock(block, getStorageType())) { + incScheduledSize(pm.block.getNumBytes()); + return pm; + } else { + getDDatanode().removePendingBlock(pm); + } + } + return null; + } + /** @return the name for display */ String getDisplayName() { return datanode + ":" + storageType; @@ -436,38 +487,46 @@ public String toString() { } final DatanodeInfo datanode; - final EnumMap storageMap + private final EnumMap sourceMap + = new EnumMap(StorageType.class); + private final EnumMap targetMap = new EnumMap(StorageType.class); protected long delayUntil = 0L; /** blocks being moved but not confirmed yet */ private final List pendings; + private volatile boolean hasFailure = false; private final int maxConcurrentMoves; @Override public String toString() { - return getClass().getSimpleName() + ":" + datanode + ":" + storageMap.values(); + return getClass().getSimpleName() + ":" + datanode; } - private DDatanode(DatanodeStorageReport r, int maxConcurrentMoves) { - this.datanode = r.getDatanodeInfo(); + private DDatanode(DatanodeInfo datanode, int maxConcurrentMoves) { + this.datanode = datanode; this.maxConcurrentMoves = maxConcurrentMoves; this.pendings = new ArrayList(maxConcurrentMoves); } - private void put(StorageType storageType, StorageGroup g) { - final StorageGroup existing = storageMap.put(storageType, g); + public DatanodeInfo getDatanodeInfo() { + return datanode; + } + + private static void put(StorageType storageType, + G g, EnumMap map) { + final StorageGroup existing = map.put(storageType, g); Preconditions.checkState(existing == null); } - StorageGroup addStorageGroup(StorageType storageType, long maxSize2Move) { + public StorageGroup addTarget(StorageType storageType, long maxSize2Move) { final StorageGroup g = new StorageGroup(storageType, maxSize2Move); - put(storageType, g); + put(storageType, g, targetMap); return g; } - Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) { + public Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) { final Source s = d.new Source(storageType, maxSize2Move, this); - put(storageType, s); + put(storageType, s, sourceMap); return s; } @@ -505,10 +564,14 @@ synchronized boolean addPendingBlock(PendingMove pendingBlock) { synchronized boolean removePendingBlock(PendingMove pendingBlock) { return pendings.remove(pendingBlock); } + + void setHasFailure() { + this.hasFailure = true; + } } /** A node that can be the sources of a block move */ - class Source extends DDatanode.StorageGroup { + public class Source extends DDatanode.StorageGroup { private final List tasks = new ArrayList(2); private long blocksToReceive = 0L; @@ -576,8 +639,11 @@ private long getBlockList() throws IOException { /** Decide if the given block is a good candidate to move or not */ private boolean isGoodBlockCandidate(DBlock block) { + // source and target must have the same storage type + final StorageType sourceStorageType = getStorageType(); for (Task t : tasks) { - if (Dispatcher.this.isGoodBlockCandidate(this, t.target, block)) { + if (Dispatcher.this.isGoodBlockCandidate(this, t.target, + sourceStorageType, block)) { return true; } } @@ -597,11 +663,9 @@ private PendingMove chooseNextMove() { for (Iterator i = tasks.iterator(); i.hasNext();) { final Task task = i.next(); final DDatanode target = task.target.getDDatanode(); - PendingMove pendingBlock = new PendingMove(); + final PendingMove pendingBlock = new PendingMove(this, task.target); if (target.addPendingBlock(pendingBlock)) { // target is not busy, so do a tentative block allocation - pendingBlock.source = this; - pendingBlock.target = task.target; if (pendingBlock.chooseBlockAndProxy()) { long blockSize = pendingBlock.block.getNumBytes(); incScheduledSize(-blockSize); @@ -618,6 +682,11 @@ private PendingMove chooseNextMove() { } return null; } + + /** Add a pending move */ + public PendingMove addPendingMove(DBlock block, StorageGroup target) { + return target.addPendingMove(block, new PendingMove(this, target)); + } /** Iterate all source's blocks to remove moved ones */ private void removeMovedBlocks() { @@ -656,13 +725,7 @@ private void dispatchBlocks() { if (p != null) { // Reset no pending move counter noPendingMoveIteration=0; - // move the block - moveExecutor.execute(new Runnable() { - @Override - public void run() { - p.dispatch(); - } - }); + executePendingMove(p); continue; } @@ -718,22 +781,24 @@ public Dispatcher(NameNodeConnector nnc, Set includedNodes, this.cluster = NetworkTopology.getInstance(conf); this.moveExecutor = Executors.newFixedThreadPool(moverThreads); - this.dispatchExecutor = Executors.newFixedThreadPool(dispatcherThreads); + this.dispatchExecutor = dispatcherThreads == 0? null + : Executors.newFixedThreadPool(dispatcherThreads); this.maxConcurrentMovesPerNode = maxConcurrentMovesPerNode; - final boolean fallbackToSimpleAuthAllowed = conf.getBoolean( - CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, - CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT); this.saslClient = new SaslDataTransferClient( DataTransferSaslUtil.getSaslPropertiesResolver(conf), - TrustedChannelResolver.getInstance(conf), fallbackToSimpleAuthAllowed); + TrustedChannelResolver.getInstance(conf), nnc.fallbackToSimpleAuth); } - StorageGroupMap getStorageGroupMap() { + public DistributedFileSystem getDistributedFileSystem() { + return nnc.getDistributedFileSystem(); + } + + public StorageGroupMap getStorageGroupMap() { return storageGroupMap; } - NetworkTopology getCluster() { + public NetworkTopology getCluster() { return cluster; } @@ -781,7 +846,7 @@ private boolean shouldIgnore(DatanodeInfo dn) { } /** Get live datanode storage reports and then build the network topology. */ - List init() throws IOException { + public List init() throws IOException { final DatanodeStorageReport[] reports = nnc.getLiveDatanodeStorageReport(); final List trimmed = new ArrayList(); // create network topology and classify utilization collections: @@ -797,8 +862,18 @@ List init() throws IOException { return trimmed; } - public DDatanode newDatanode(DatanodeStorageReport r) { - return new DDatanode(r, maxConcurrentMovesPerNode); + public DDatanode newDatanode(DatanodeInfo datanode) { + return new DDatanode(datanode, maxConcurrentMovesPerNode); + } + + public void executePendingMove(final PendingMove p) { + // move the block + moveExecutor.execute(new Runnable() { + @Override + public void run() { + p.dispatch(); + } + }); } public boolean dispatchAndCheckContinue() throws InterruptedException { @@ -838,7 +913,7 @@ public void run() { } // wait for all block moving to be done - waitForMoveCompletion(); + waitForMoveCompletion(targets); return bytesMoved.get() - bytesLastMoved; } @@ -846,23 +921,25 @@ public void run() { /** The sleeping period before checking if block move is completed again */ static private long blockMoveWaitTime = 30000L; - /** set the sleeping period for block move completion check */ - static void setBlockMoveWaitTime(long time) { - blockMoveWaitTime = time; - } - - /** Wait for all block move confirmations. */ - private void waitForMoveCompletion() { + /** + * Wait for all block move confirmations. + * @return true if there is failed move execution + */ + public static boolean waitForMoveCompletion( + Iterable targets) { + boolean hasFailure = false; for(;;) { boolean empty = true; for (StorageGroup t : targets) { if (!t.getDDatanode().isPendingQEmpty()) { empty = false; break; + } else { + hasFailure |= t.getDDatanode().hasFailure; } } if (empty) { - return; //all pending queues are empty + return hasFailure; // all pending queues are empty } try { Thread.sleep(blockMoveWaitTime); @@ -873,14 +950,14 @@ private void waitForMoveCompletion() { /** * Decide if the block is a good candidate to be moved from source to target. - * A block is a good candidate if + * A block is a good candidate if * 1. the block is not in the process of being moved/has not been moved; * 2. the block does not have a replica on the target; * 3. doing the move does not reduce the number of racks that the block has */ - private boolean isGoodBlockCandidate(Source source, StorageGroup target, - DBlock block) { - if (source.storageType != target.storageType) { + private boolean isGoodBlockCandidate(StorageGroup source, StorageGroup target, + StorageType targetStorageType, DBlock block) { + if (target.storageType != targetStorageType) { return false; } // check if the block is moved or not @@ -891,7 +968,7 @@ private boolean isGoodBlockCandidate(Source source, StorageGroup target, return false; } if (cluster.isNodeGroupAware() - && isOnSameNodeGroupWithReplicas(target, block, source)) { + && isOnSameNodeGroupWithReplicas(source, target, block)) { return false; } if (reduceNumOfRacks(source, target, block)) { @@ -904,7 +981,7 @@ && isOnSameNodeGroupWithReplicas(target, block, source)) { * Determine whether moving the given block replica from source to target * would reduce the number of racks of the block replicas. */ - private boolean reduceNumOfRacks(Source source, StorageGroup target, + private boolean reduceNumOfRacks(StorageGroup source, StorageGroup target, DBlock block) { final DatanodeInfo sourceDn = source.getDatanodeInfo(); if (cluster.isOnSameRack(sourceDn, target.getDatanodeInfo())) { @@ -937,12 +1014,12 @@ private boolean reduceNumOfRacks(Source source, StorageGroup target, * Check if there are any replica (other than source) on the same node group * with target. If true, then target is not a good candidate for placing * specific replica as we don't want 2 replicas under the same nodegroup. - * + * * @return true if there are any replica (other than source) on the same node * group with target */ - private boolean isOnSameNodeGroupWithReplicas( - StorageGroup target, DBlock block, Source source) { + private boolean isOnSameNodeGroupWithReplicas(StorageGroup source, + StorageGroup target, DBlock block) { final DatanodeInfo targetDn = target.getDatanodeInfo(); for (StorageGroup g : block.getLocations()) { if (g != source && cluster.isOnSameNodeGroup(g.getDatanodeInfo(), targetDn)) { @@ -962,9 +1039,22 @@ void reset(Configuration conf) { movedBlocks.cleanup(); } + /** set the sleeping period for block move completion check */ + @VisibleForTesting + public static void setBlockMoveWaitTime(long time) { + blockMoveWaitTime = time; + } + + @VisibleForTesting + public static void setDelayAfterErrors(long time) { + delayAfterErrors = time; + } + /** shutdown thread pools */ - void shutdownNow() { - dispatchExecutor.shutdownNow(); + public void shutdownNow() { + if (dispatchExecutor != null) { + dispatchExecutor.shutdownNow(); + } moveExecutor.shutdownNow(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Matcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Matcher.java index 54febc6fee77e..f8d00711b8756 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Matcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Matcher.java @@ -31,6 +31,11 @@ public interface Matcher { public boolean match(NetworkTopology cluster, Node left, Node right) { return cluster.isOnSameNodeGroup(left, right); } + + @Override + public String toString() { + return "SAME_NODE_GROUP"; + } }; /** Match datanodes in the same rack. */ @@ -39,6 +44,11 @@ public boolean match(NetworkTopology cluster, Node left, Node right) { public boolean match(NetworkTopology cluster, Node left, Node right) { return cluster.isOnSameRack(left, right); } + + @Override + public String toString() { + return "SAME_RACK"; + } }; /** Match any datanode with any other datanode. */ @@ -47,5 +57,10 @@ public boolean match(NetworkTopology cluster, Node left, Node right) { public boolean match(NetworkTopology cluster, Node left, Node right) { return left != right; } + + @Override + public String toString() { + return "ANY_OTHER"; + } }; } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/MovedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/MovedBlocks.java index 557bfd36ab0ad..18b9cd8ecf5d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/MovedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/MovedBlocks.java @@ -40,7 +40,7 @@ public class MovedBlocks { public static class Locations { private final Block block; // the block /** The locations of the replicas of the block. */ - private final List locations = new ArrayList(3); + protected final List locations = new ArrayList(3); public Locations(Block block) { this.block = block; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java index 820a4edd579b8..91625314d7926 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java @@ -18,19 +18,26 @@ package org.apache.hadoop.hdfs.server.balancer; import java.io.Closeable; -import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.InetAddress; import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.ClientProtocol; @@ -43,6 +50,8 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; +import com.google.common.annotations.VisibleForTesting; + /** * The class provides utilities for accessing a NameNode. */ @@ -51,6 +60,41 @@ public class NameNodeConnector implements Closeable { private static final Log LOG = LogFactory.getLog(NameNodeConnector.class); private static final int MAX_NOT_CHANGED_ITERATIONS = 5; + private static boolean write2IdFile = true; + + /** Create {@link NameNodeConnector} for the given namenodes. */ + public static List newNameNodeConnectors( + Collection namenodes, String name, Path idPath, Configuration conf) + throws IOException { + final List connectors = new ArrayList( + namenodes.size()); + for (URI uri : namenodes) { + NameNodeConnector nnc = new NameNodeConnector(name, uri, idPath, + null, conf); + nnc.getKeyManager().startBlockKeyUpdater(); + connectors.add(nnc); + } + return connectors; + } + + public static List newNameNodeConnectors( + Map> namenodes, String name, Path idPath, + Configuration conf) throws IOException { + final List connectors = new ArrayList( + namenodes.size()); + for (Map.Entry> entry : namenodes.entrySet()) { + NameNodeConnector nnc = new NameNodeConnector(name, entry.getKey(), + idPath, entry.getValue(), conf); + nnc.getKeyManager().startBlockKeyUpdater(); + connectors.add(nnc); + } + return connectors; + } + + @VisibleForTesting + public static void setWrite2IdFile(boolean write2IdFile) { + NameNodeConnector.write2IdFile = write2IdFile; + } private final URI nameNodeUri; private final String blockpoolID; @@ -58,23 +102,28 @@ public class NameNodeConnector implements Closeable { private final NamenodeProtocol namenode; private final ClientProtocol client; private final KeyManager keyManager; + final AtomicBoolean fallbackToSimpleAuth = new AtomicBoolean(false); - private final FileSystem fs; + private final DistributedFileSystem fs; private final Path idPath; private final OutputStream out; + private final List targetPaths; private int notChangedIterations = 0; public NameNodeConnector(String name, URI nameNodeUri, Path idPath, - Configuration conf) throws IOException { + List targetPaths, Configuration conf) + throws IOException { this.nameNodeUri = nameNodeUri; this.idPath = idPath; - + this.targetPaths = targetPaths == null || targetPaths.isEmpty() ? Arrays + .asList(new Path("/")) : targetPaths; + this.namenode = NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class).getProxy(); this.client = NameNodeProxies.createProxy(conf, nameNodeUri, - ClientProtocol.class).getProxy(); - this.fs = FileSystem.get(nameNodeUri, conf); + ClientProtocol.class, fallbackToSimpleAuth).getProxy(); + this.fs = (DistributedFileSystem)FileSystem.get(nameNodeUri, conf); final NamespaceInfo namespaceinfo = namenode.versionRequest(); this.blockpoolID = namespaceinfo.getBlockPoolID(); @@ -82,13 +131,18 @@ public NameNodeConnector(String name, URI nameNodeUri, Path idPath, final FsServerDefaults defaults = fs.getServerDefaults(new Path("/")); this.keyManager = new KeyManager(blockpoolID, namenode, defaults.getEncryptDataTransfer(), conf); - // Exit if there is another one running. - out = checkAndMarkRunning(); + // if it is for test, we do not create the id file + out = checkAndMarkRunning(); if (out == null) { + // Exit if there is another one running. throw new IOException("Another " + name + " is running."); } } + public DistributedFileSystem getDistributedFileSystem() { + return fs; + } + /** @return the block pool ID */ public String getBlockpoolID() { return blockpoolID; @@ -111,6 +165,11 @@ public KeyManager getKeyManager() { return keyManager; } + /** @return the list of paths to scan/migrate */ + public List getTargetPaths() { + return targetPaths; + } + /** Should the instance continue running? */ public boolean shouldContinue(long dispatchBlockMoveBytes) { if (dispatchBlockMoveBytes > 0) { @@ -144,9 +203,11 @@ public boolean shouldContinue(long dispatchBlockMoveBytes) { */ private OutputStream checkAndMarkRunning() throws IOException { try { - final DataOutputStream out = fs.create(idPath); - out.writeBytes(InetAddress.getLocalHost().getHostName()); - out.flush(); + final FSDataOutputStream out = fs.create(idPath); + if (write2IdFile) { + out.writeBytes(InetAddress.getLocalHost().getHostName()); + out.hflush(); + } return out; } catch(RemoteException e) { if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java index c1e0682dd5b99..9ef227400c41f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockCollection.java @@ -60,6 +60,11 @@ public interface BlockCollection { */ public short getBlockReplication(); + /** + * @return the storage policy ID. + */ + public byte getStoragePolicyID(); + /** * Get the name of the collection. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 8470680a98c57..cb303a792cfe5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -42,6 +42,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; @@ -254,6 +255,7 @@ public int getPendingDataNodeMessageCount() { /** for block replicas placement */ private BlockPlacementPolicy blockplacement; + private final BlockStoragePolicy.Suite storagePolicySuite; /** Check whether name system is running before terminating */ private boolean checkNSRunning = true; @@ -276,6 +278,7 @@ public BlockManager(final Namesystem namesystem, final FSClusterStats stats, blockplacement = BlockPlacementPolicy.getInstance( conf, stats, datanodeManager.getNetworkTopology(), datanodeManager.getHost2DatanodeMap()); + storagePolicySuite = BlockStoragePolicy.readBlockStorageSuite(conf); pendingReplications = new PendingReplicationBlocks(conf.getInt( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_DEFAULT) * 1000L); @@ -394,7 +397,11 @@ private static BlockTokenSecretManager createBlockTokenSecretManager( lifetimeMin*60*1000L, 0, null, encryptionAlgorithm); } } - + + public BlockStoragePolicy getStoragePolicy(final String policyName) { + return storagePolicySuite.getPolicy(policyName); + } + public void setBlockPoolId(String blockPoolId) { if (isBlockTokenEnabled()) { blockTokenSecretManager.setBlockPoolId(blockPoolId); @@ -445,7 +452,7 @@ public DatanodeManager getDatanodeManager() { return datanodeManager; } - /** @return the BlockPlacementPolicy */ + @VisibleForTesting public BlockPlacementPolicy getBlockPlacementPolicy() { return blockplacement; } @@ -1366,7 +1373,7 @@ int computeReplicationWorkForBlocks(List> blocksToReplicate) { // choose replication targets: NOT HOLDING THE GLOBAL LOCK // It is costly to extract the filename for which chooseTargets is called, // so for now we pass in the block collection itself. - rw.chooseTargets(blockplacement, excludedNodes); + rw.chooseTargets(blockplacement, storagePolicySuite, excludedNodes); } namesystem.writeLock(); @@ -1467,24 +1474,48 @@ int computeReplicationWorkForBlocks(List> blocksToReplicate) { return scheduledWork; } + /** Choose target for WebHDFS redirection. */ + public DatanodeStorageInfo[] chooseTarget4WebHDFS(String src, + DatanodeDescriptor clientnode, Set excludes, long blocksize) { + return blockplacement.chooseTarget(src, 1, clientnode, + Collections.emptyList(), false, excludes, + blocksize, storagePolicySuite.getDefaultPolicy()); + } + + /** Choose target for getting additional datanodes for an existing pipeline. */ + public DatanodeStorageInfo[] chooseTarget4AdditionalDatanode(String src, + int numAdditionalNodes, + DatanodeDescriptor clientnode, + List chosen, + Set excludes, + long blocksize, + byte storagePolicyID) { + + final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(storagePolicyID); + return blockplacement.chooseTarget(src, numAdditionalNodes, clientnode, + chosen, true, excludes, blocksize, storagePolicy); + } + /** - * Choose target datanodes according to the replication policy. + * Choose target datanodes for creating a new block. * * @throws IOException * if the number of targets < minimum replication. * @see BlockPlacementPolicy#chooseTarget(String, int, Node, - * List, boolean, Set, long, StorageType) + * Set, long, List, BlockStoragePolicy) */ - public DatanodeStorageInfo[] chooseTarget(final String src, + public DatanodeStorageInfo[] chooseTarget4NewBlock(final String src, final int numOfReplicas, final DatanodeDescriptor client, final Set excludedNodes, - final long blocksize, List favoredNodes) throws IOException { + final long blocksize, + final List favoredNodes, + final byte storagePolicyID) throws IOException { List favoredDatanodeDescriptors = getDatanodeDescriptors(favoredNodes); + final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(storagePolicyID); final DatanodeStorageInfo[] targets = blockplacement.chooseTarget(src, numOfReplicas, client, excludedNodes, blocksize, - // TODO: get storage type from file - favoredDatanodeDescriptors, StorageType.DEFAULT); + favoredDatanodeDescriptors, storagePolicy); if (targets.length < minReplication) { throw new IOException("File " + src + " could only be replicated to " + targets.length + " nodes instead of minReplication (=" @@ -2716,6 +2747,10 @@ private void chooseExcessReplicates(final Collection nonExc assert namesystem.hasWriteLock(); // first form a rack to datanodes map and BlockCollection bc = getBlockCollection(b); + final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(bc.getStoragePolicyID()); + final List excessTypes = storagePolicy.chooseExcess( + replication, DatanodeStorageInfo.toStorageTypes(nonExcess)); + final Map> rackMap = new HashMap>(); @@ -2736,16 +2771,13 @@ private void chooseExcessReplicates(final Collection nonExc final DatanodeStorageInfo addedNodeStorage = DatanodeStorageInfo.getDatanodeStorageInfo(nonExcess, addedNode); while (nonExcess.size() - replication > 0) { - // check if we can delete delNodeHint final DatanodeStorageInfo cur; - if (firstOne && delNodeHintStorage != null - && (moreThanOne.contains(delNodeHintStorage) - || (addedNodeStorage != null - && !moreThanOne.contains(addedNodeStorage)))) { + if (useDelHint(firstOne, delNodeHintStorage, addedNodeStorage, + moreThanOne, excessTypes)) { cur = delNodeHintStorage; } else { // regular excessive replica removal cur = replicator.chooseReplicaToDelete(bc, b, replication, - moreThanOne, exactlyOne); + moreThanOne, exactlyOne, excessTypes); } firstOne = false; @@ -2771,6 +2803,27 @@ private void chooseExcessReplicates(final Collection nonExc } } + /** Check if we can use delHint */ + static boolean useDelHint(boolean isFirst, DatanodeStorageInfo delHint, + DatanodeStorageInfo added, List moreThan1Racks, + List excessTypes) { + if (!isFirst) { + return false; // only consider delHint for the first case + } else if (delHint == null) { + return false; // no delHint + } else if (!excessTypes.contains(delHint.getStorageType())) { + return false; // delHint storage type is not an excess type + } else { + // check if removing delHint reduces the number of racks + if (moreThan1Racks.contains(delHint)) { + return true; // delHint and some other nodes are under the same rack + } else if (added != null && !moreThan1Racks.contains(added)) { + return true; // the added node adds a new rack + } + return false; // removing delHint reduces the number of racks; + } + } + private void addToExcessReplicate(DatanodeInfo dn, Block block) { assert namesystem.hasWriteLock(); LightWeightLinkedSet excessBlocks = excessReplicateMap.get(dn.getDatanodeUuid()); @@ -2877,7 +2930,7 @@ void addBlock(DatanodeStorageInfo storageInfo, Block block, String delHint) // Decrement number of blocks scheduled to this datanode. // for a retry request (of DatanodeProtocol#blockReceivedAndDeleted with // RECEIVED_BLOCK), we currently also decrease the approximate number. - node.decrementBlocksScheduled(); + node.decrementBlocksScheduled(storageInfo.getStorageType()); // get the deletion hint node DatanodeDescriptor delHintNode = null; @@ -3546,10 +3599,12 @@ public ReplicationWork(Block block, } private void chooseTargets(BlockPlacementPolicy blockplacement, + BlockStoragePolicy.Suite storagePolicySuite, Set excludedNodes) { targets = blockplacement.chooseTarget(bc.getName(), additionalReplRequired, srcNode, liveReplicaStorages, false, - excludedNodes, block.getNumBytes(), StorageType.DEFAULT); + excludedNodes, block.getNumBytes(), + storagePolicySuite.getPolicy(bc.getStoragePolicyID())); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java index 2e9e2e44efd6c..00f72532751e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java @@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; @@ -75,7 +76,7 @@ public abstract DatanodeStorageInfo[] chooseTarget(String srcPath, boolean returnChosenNodes, Set excludedNodes, long blocksize, - StorageType storageType); + BlockStoragePolicy storagePolicy); /** * Same as {@link #chooseTarget(String, int, Node, Set, long, List, StorageType)} @@ -89,14 +90,14 @@ DatanodeStorageInfo[] chooseTarget(String src, Set excludedNodes, long blocksize, List favoredNodes, - StorageType storageType) { + BlockStoragePolicy storagePolicy) { // This class does not provide the functionality of placing // a block in favored datanodes. The implementations of this class // are expected to provide this functionality return chooseTarget(src, numOfReplicas, writer, new ArrayList(numOfReplicas), false, - excludedNodes, blocksize, storageType); + excludedNodes, blocksize, storagePolicy); } /** @@ -118,18 +119,21 @@ abstract public BlockPlacementStatus verifyBlockPlacement(String srcPath, * @param srcBC block collection of file to which block-to-be-deleted belongs * @param block The block to be deleted * @param replicationFactor The required number of replicas for this block - * @param existingReplicas The replica locations of this block that are present - on at least two unique racks. - * @param moreExistingReplicas Replica locations of this block that are not - listed in the previous parameter. + * @param moreThanOne The replica locations of this block that are present + * on more than one unique racks. + * @param exactlyOne Replica locations of this block that are present + * on exactly one unique racks. + * @param excessTypes The excess {@link StorageType}s according to the + * {@link BlockStoragePolicy}. * @return the replica that is the best candidate for deletion */ abstract public DatanodeStorageInfo chooseReplicaToDelete( BlockCollection srcBC, Block block, short replicationFactor, - Collection existingReplicas, - Collection moreExistingReplicas); + Collection moreThanOne, + Collection exactlyOne, + List excessTypes); /** * Used to setup a BlockPlacementPolicy object. This should be defined by diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index f77d4ab563505..a0e67013649e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -19,15 +19,11 @@ import static org.apache.hadoop.util.Time.now; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.StorageType; @@ -80,12 +76,6 @@ protected StringBuilder initialValue() { */ protected int tolerateHeartbeatMultiplier; - protected BlockPlacementPolicyDefault(Configuration conf, FSClusterStats stats, - NetworkTopology clusterMap, - Host2NodesMap host2datanodeMap) { - initialize(conf, stats, clusterMap, host2datanodeMap); - } - protected BlockPlacementPolicyDefault() { } @@ -117,9 +107,9 @@ public DatanodeStorageInfo[] chooseTarget(String srcPath, boolean returnChosenNodes, Set excludedNodes, long blocksize, - StorageType storageType) { + final BlockStoragePolicy storagePolicy) { return chooseTarget(numOfReplicas, writer, chosenNodes, returnChosenNodes, - excludedNodes, blocksize, storageType); + excludedNodes, blocksize, storagePolicy); } @Override @@ -129,17 +119,21 @@ DatanodeStorageInfo[] chooseTarget(String src, Set excludedNodes, long blocksize, List favoredNodes, - StorageType storageType) { + BlockStoragePolicy storagePolicy) { try { if (favoredNodes == null || favoredNodes.size() == 0) { // Favored nodes not specified, fall back to regular block placement. return chooseTarget(src, numOfReplicas, writer, new ArrayList(numOfReplicas), false, - excludedNodes, blocksize, storageType); + excludedNodes, blocksize, storagePolicy); } Set favoriteAndExcludedNodes = excludedNodes == null ? new HashSet() : new HashSet(excludedNodes); + final List requiredStorageTypes = storagePolicy + .chooseStorageTypes((short)numOfReplicas); + final EnumMap storageTypes = + getRequiredStorageTypes(requiredStorageTypes); // Choose favored nodes List results = new ArrayList(); @@ -152,7 +146,7 @@ DatanodeStorageInfo[] chooseTarget(String src, final DatanodeStorageInfo target = chooseLocalStorage(favoredNode, favoriteAndExcludedNodes, blocksize, getMaxNodesPerRack(results.size(), numOfReplicas)[1], - results, avoidStaleNodes, storageType, false); + results, avoidStaleNodes, storageTypes, false); if (target == null) { LOG.warn("Could not find a target for file " + src + " with favored node " + favoredNode); @@ -166,7 +160,7 @@ DatanodeStorageInfo[] chooseTarget(String src, numOfReplicas -= results.size(); DatanodeStorageInfo[] remainingTargets = chooseTarget(src, numOfReplicas, writer, results, - false, favoriteAndExcludedNodes, blocksize, storageType); + false, favoriteAndExcludedNodes, blocksize, storagePolicy); for (int i = 0; i < remainingTargets.length; i++) { results.add(remainingTargets[i]); } @@ -174,10 +168,14 @@ DatanodeStorageInfo[] chooseTarget(String src, return getPipeline(writer, results.toArray(new DatanodeStorageInfo[results.size()])); } catch (NotEnoughReplicasException nr) { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to choose with favored nodes (=" + favoredNodes + + "), disregard favored nodes hint and retry.", nr); + } // Fall back to regular block placement disregarding favored nodes hint return chooseTarget(src, numOfReplicas, writer, new ArrayList(numOfReplicas), false, - excludedNodes, blocksize, storageType); + excludedNodes, blocksize, storagePolicy); } } @@ -188,7 +186,7 @@ private DatanodeStorageInfo[] chooseTarget(int numOfReplicas, boolean returnChosenNodes, Set excludedNodes, long blocksize, - StorageType storageType) { + final BlockStoragePolicy storagePolicy) { if (numOfReplicas == 0 || clusterMap.getNumOfLeaves()==0) { return DatanodeStorageInfo.EMPTY_ARRAY; } @@ -213,8 +211,9 @@ private DatanodeStorageInfo[] chooseTarget(int numOfReplicas, boolean avoidStaleNodes = (stats != null && stats.isAvoidingStaleDataNodesForWrite()); - Node localNode = chooseTarget(numOfReplicas, writer, - excludedNodes, blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); + final Node localNode = chooseTarget(numOfReplicas, writer, excludedNodes, + blocksize, maxNodesPerRack, results, avoidStaleNodes, storagePolicy, + EnumSet.noneOf(StorageType.class), results.isEmpty()); if (!returnChosenNodes) { results.removeAll(chosenStorage); } @@ -234,7 +233,22 @@ private int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) { int maxNodesPerRack = (totalNumOfReplicas-1)/clusterMap.getNumOfRacks()+2; return new int[] {numOfReplicas, maxNodesPerRack}; } - + + private EnumMap getRequiredStorageTypes( + List types) { + EnumMap map = new EnumMap(StorageType.class); + for (StorageType type : types) { + if (!map.containsKey(type)) { + map.put(type, 1); + } else { + int num = map.get(type); + map.put(type, num + 1); + } + } + return map; + } + /** * choose numOfReplicas from all data nodes * @param numOfReplicas additional number of replicas wanted @@ -247,31 +261,49 @@ private int[] getMaxNodesPerRack(int numOfChosen, int numOfReplicas) { * @return local node of writer (not chosen node) */ private Node chooseTarget(int numOfReplicas, - Node writer, - Set excludedNodes, - long blocksize, - int maxNodesPerRack, - List results, - final boolean avoidStaleNodes, - StorageType storageType) { + Node writer, + final Set excludedNodes, + final long blocksize, + final int maxNodesPerRack, + final List results, + final boolean avoidStaleNodes, + final BlockStoragePolicy storagePolicy, + final EnumSet unavailableStorages, + final boolean newBlock) { if (numOfReplicas == 0 || clusterMap.getNumOfLeaves()==0) { return writer; } - int totalReplicasExpected = numOfReplicas + results.size(); - - int numOfResults = results.size(); - boolean newBlock = (numOfResults==0); + final int numOfResults = results.size(); + final int totalReplicasExpected = numOfReplicas + numOfResults; if ((writer == null || !(writer instanceof DatanodeDescriptor)) && !newBlock) { writer = results.get(0).getDatanodeDescriptor(); } // Keep a copy of original excludedNodes - final Set oldExcludedNodes = avoidStaleNodes ? - new HashSet(excludedNodes) : null; + final Set oldExcludedNodes = new HashSet(excludedNodes); + + // choose storage types; use fallbacks for unavailable storages + final List requiredStorageTypes = storagePolicy + .chooseStorageTypes((short) totalReplicasExpected, + DatanodeStorageInfo.toStorageTypes(results), + unavailableStorages, newBlock); + final EnumMap storageTypes = + getRequiredStorageTypes(requiredStorageTypes); + if (LOG.isTraceEnabled()) { + LOG.trace("storageTypes=" + storageTypes); + } + try { + if ((numOfReplicas = requiredStorageTypes.size()) == 0) { + throw new NotEnoughReplicasException( + "All required storage types are unavailable: " + + " unavailableStorages=" + unavailableStorages + + ", storagePolicy=" + storagePolicy); + } + if (numOfResults == 0) { writer = chooseLocalStorage(writer, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType, true) + maxNodesPerRack, results, avoidStaleNodes, storageTypes, true) .getDatanodeDescriptor(); if (--numOfReplicas == 0) { return writer; @@ -280,7 +312,7 @@ private Node chooseTarget(int numOfReplicas, final DatanodeDescriptor dn0 = results.get(0).getDatanodeDescriptor(); if (numOfResults <= 1) { chooseRemoteRack(1, dn0, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); if (--numOfReplicas == 0) { return writer; } @@ -289,24 +321,28 @@ private Node chooseTarget(int numOfReplicas, final DatanodeDescriptor dn1 = results.get(1).getDatanodeDescriptor(); if (clusterMap.isOnSameRack(dn0, dn1)) { chooseRemoteRack(1, dn0, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); } else if (newBlock){ chooseLocalRack(dn1, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); } else { chooseLocalRack(writer, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); } if (--numOfReplicas == 0) { return writer; } } chooseRandom(numOfReplicas, NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } catch (NotEnoughReplicasException e) { final String message = "Failed to place enough replicas, still in need of " + (totalReplicasExpected - results.size()) + " to reach " - + totalReplicasExpected + "."; + + totalReplicasExpected + + " (unavailableStorages=" + unavailableStorages + + ", storagePolicy=" + storagePolicy + + ", newBlock=" + newBlock + ")"; + if (LOG.isTraceEnabled()) { LOG.trace(message, e); } else { @@ -327,7 +363,28 @@ private Node chooseTarget(int numOfReplicas, // if the NotEnoughReplicasException was thrown in chooseRandom(). numOfReplicas = totalReplicasExpected - results.size(); return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize, - maxNodesPerRack, results, false, storageType); + maxNodesPerRack, results, false, storagePolicy, unavailableStorages, + newBlock); + } + + boolean retry = false; + // simply add all the remaining types into unavailableStorages and give + // another try. No best effort is guaranteed here. + for (StorageType type : storageTypes.keySet()) { + if (!unavailableStorages.contains(type)) { + unavailableStorages.add(type); + retry = true; + } + } + if (retry) { + for (DatanodeStorageInfo resultStorage : results) { + addToExcludedNodes(resultStorage.getDatanodeDescriptor(), + oldExcludedNodes); + } + numOfReplicas = totalReplicasExpected - results.size(); + return chooseTarget(numOfReplicas, writer, oldExcludedNodes, blocksize, + maxNodesPerRack, results, false, storagePolicy, unavailableStorages, + newBlock); } } return writer; @@ -340,28 +397,35 @@ private Node chooseTarget(int numOfReplicas, * @return the chosen storage */ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, - Set excludedNodes, - long blocksize, - int maxNodesPerRack, - List results, - boolean avoidStaleNodes, - StorageType storageType, - boolean fallbackToLocalRack) + Set excludedNodes, long blocksize, int maxNodesPerRack, + List results, boolean avoidStaleNodes, + EnumMap storageTypes, boolean fallbackToLocalRack) throws NotEnoughReplicasException { // if no local machine, randomly choose one node if (localMachine == null) { return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } if (preferLocalNode && localMachine instanceof DatanodeDescriptor) { DatanodeDescriptor localDatanode = (DatanodeDescriptor) localMachine; // otherwise try local machine first if (excludedNodes.add(localMachine)) { // was not in the excluded list - for(DatanodeStorageInfo localStorage : DFSUtil.shuffle( - localDatanode.getStorageInfos())) { - if (addIfIsGoodTarget(localStorage, excludedNodes, blocksize, - maxNodesPerRack, false, results, avoidStaleNodes, storageType) >= 0) { - return localStorage; + for (Iterator> iter = storageTypes + .entrySet().iterator(); iter.hasNext(); ) { + Map.Entry entry = iter.next(); + for (DatanodeStorageInfo localStorage : DFSUtil.shuffle( + localDatanode.getStorageInfos())) { + StorageType type = entry.getKey(); + if (addIfIsGoodTarget(localStorage, excludedNodes, blocksize, + maxNodesPerRack, false, results, avoidStaleNodes, type) >= 0) { + int num = entry.getValue(); + if (num == 1) { + iter.remove(); + } else { + entry.setValue(num - 1); + } + return localStorage; + } } } } @@ -372,7 +436,7 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, } // try a node on local rack return chooseLocalRack(localMachine, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } /** @@ -395,50 +459,71 @@ protected int addToExcludedNodes(DatanodeDescriptor localMachine, * @return the chosen node */ protected DatanodeStorageInfo chooseLocalRack(Node localMachine, - Set excludedNodes, - long blocksize, - int maxNodesPerRack, - List results, - boolean avoidStaleNodes, - StorageType storageType) + Set excludedNodes, + long blocksize, + int maxNodesPerRack, + List results, + boolean avoidStaleNodes, + EnumMap storageTypes) throws NotEnoughReplicasException { // no local machine, so choose a random machine if (localMachine == null) { return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } + final String localRack = localMachine.getNetworkLocation(); - // choose one from the local rack try { - return chooseRandom(localMachine.getNetworkLocation(), excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); - } catch (NotEnoughReplicasException e1) { - // find the second replica - DatanodeDescriptor newLocal=null; + // choose one from the local rack + return chooseRandom(localRack, excludedNodes, + blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes); + } catch (NotEnoughReplicasException e) { + // find the next replica and retry with its rack for(DatanodeStorageInfo resultStorage : results) { DatanodeDescriptor nextNode = resultStorage.getDatanodeDescriptor(); if (nextNode != localMachine) { - newLocal = nextNode; - break; + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to choose from local rack (location = " + localRack + + "), retry with the rack of the next replica (location = " + + nextNode.getNetworkLocation() + ")", e); + } + return chooseFromNextRack(nextNode, excludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } - if (newLocal != null) { - try { - return chooseRandom(newLocal.getNetworkLocation(), excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); - } catch(NotEnoughReplicasException e2) { - //otherwise randomly choose one from the network - return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); - } - } else { - //otherwise randomly choose one from the network - return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to choose from local rack (location = " + localRack + + "); the second replica is not found, retry choosing ramdomly", e); } + //the second replica is not found, randomly choose one from the network + return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } - + + private DatanodeStorageInfo chooseFromNextRack(Node next, + Set excludedNodes, + long blocksize, + int maxNodesPerRack, + List results, + boolean avoidStaleNodes, + EnumMap storageTypes) throws NotEnoughReplicasException { + final String nextRack = next.getNetworkLocation(); + try { + return chooseRandom(nextRack, excludedNodes, blocksize, maxNodesPerRack, + results, avoidStaleNodes, storageTypes); + } catch(NotEnoughReplicasException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to choose from the next rack (location = " + nextRack + + "), retry choosing ramdomly", e); + } + //otherwise randomly choose one from the network + return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, + maxNodesPerRack, results, avoidStaleNodes, storageTypes); + } + } + /** * Choose numOfReplicas nodes from the racks * that localMachine is NOT on. @@ -453,18 +538,22 @@ protected void chooseRemoteRack(int numOfReplicas, int maxReplicasPerRack, List results, boolean avoidStaleNodes, - StorageType storageType) + EnumMap storageTypes) throws NotEnoughReplicasException { int oldNumOfReplicas = results.size(); // randomly choose one node from remote racks try { chooseRandom(numOfReplicas, "~" + localMachine.getNetworkLocation(), excludedNodes, blocksize, maxReplicasPerRack, results, - avoidStaleNodes, storageType); + avoidStaleNodes, storageTypes); } catch (NotEnoughReplicasException e) { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to choose remote rack (location = ~" + + localMachine.getNetworkLocation() + "), fallback to local rack", e); + } chooseRandom(numOfReplicas-(results.size()-oldNumOfReplicas), localMachine.getNetworkLocation(), excludedNodes, blocksize, - maxReplicasPerRack, results, avoidStaleNodes, storageType); + maxReplicasPerRack, results, avoidStaleNodes, storageTypes); } } @@ -478,10 +567,10 @@ protected DatanodeStorageInfo chooseRandom(String scope, int maxNodesPerRack, List results, boolean avoidStaleNodes, - StorageType storageType) + EnumMap storageTypes) throws NotEnoughReplicasException { return chooseRandom(1, scope, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); } /** @@ -495,8 +584,8 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas, int maxNodesPerRack, List results, boolean avoidStaleNodes, - StorageType storageType) - throws NotEnoughReplicasException { + EnumMap storageTypes) + throws NotEnoughReplicasException { int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes( scope, excludedNodes); @@ -512,24 +601,43 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas, DatanodeDescriptor chosenNode = (DatanodeDescriptor)clusterMap.chooseRandom(scope); if (excludedNodes.add(chosenNode)) { //was not in the excluded list + if (LOG.isDebugEnabled()) { + builder.append("\nNode ").append(NodeBase.getPath(chosenNode)).append(" ["); + } numOfAvailableNodes--; final DatanodeStorageInfo[] storages = DFSUtil.shuffle( chosenNode.getStorageInfos()); - int i; - for(i = 0; i < storages.length; i++) { - final int newExcludedNodes = addIfIsGoodTarget(storages[i], - excludedNodes, blocksize, maxNodesPerRack, considerLoad, results, - avoidStaleNodes, storageType); - if (newExcludedNodes >= 0) { - numOfReplicas--; - if (firstChosen == null) { - firstChosen = storages[i]; + int i = 0; + boolean search = true; + for (Iterator> iter = storageTypes + .entrySet().iterator(); search && iter.hasNext(); ) { + Map.Entry entry = iter.next(); + for (i = 0; i < storages.length; i++) { + StorageType type = entry.getKey(); + final int newExcludedNodes = addIfIsGoodTarget(storages[i], + excludedNodes, blocksize, maxNodesPerRack, considerLoad, results, + avoidStaleNodes, type); + if (newExcludedNodes >= 0) { + numOfReplicas--; + if (firstChosen == null) { + firstChosen = storages[i]; + } + numOfAvailableNodes -= newExcludedNodes; + int num = entry.getValue(); + if (num == 1) { + iter.remove(); + } else { + entry.setValue(num - 1); + } + search = false; + break; } - numOfAvailableNodes -= newExcludedNodes; - break; } } + if (LOG.isDebugEnabled()) { + builder.append("\n]"); + } // If no candidate storage was found on this DN then set badTarget. badTarget = (i == storages.length); @@ -540,9 +648,11 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas, String detail = enableDebugLogging; if (LOG.isDebugEnabled()) { if (badTarget && builder != null) { - detail = builder.append("]").toString(); + detail = builder.toString(); builder.setLength(0); - } else detail = ""; + } else { + detail = ""; + } } throw new NotEnoughReplicasException(detail); } @@ -576,14 +686,10 @@ int addIfIsGoodTarget(DatanodeStorageInfo storage, private static void logNodeIsNotChosen(DatanodeStorageInfo storage, String reason) { if (LOG.isDebugEnabled()) { - final DatanodeDescriptor node = storage.getDatanodeDescriptor(); // build the error message for later use. debugLoggingBuilder.get() - .append(node).append(": ") - .append("Storage ").append(storage) - .append("at node ").append(NodeBase.getPath(node)) - .append(" is not chosen because ") - .append(reason); + .append("\n Storage ").append(storage) + .append(" is not chosen since ").append(reason).append("."); } } @@ -608,11 +714,10 @@ private boolean isGoodTarget(DatanodeStorageInfo storage, boolean considerLoad, List results, boolean avoidStaleNodes, - StorageType storageType) { - if (storage.getStorageType() != storageType) { - logNodeIsNotChosen(storage, - "storage types do not match, where the expected storage type is " - + storageType); + StorageType requiredStorageType) { + if (storage.getStorageType() != requiredStorageType) { + logNodeIsNotChosen(storage, "storage types do not match," + + " where the required storage type is " + requiredStorageType); return false; } if (storage.getState() == State.READ_ONLY_SHARED) { @@ -634,9 +739,14 @@ private boolean isGoodTarget(DatanodeStorageInfo storage, } final long requiredSize = blockSize * HdfsConstants.MIN_BLOCKS_FOR_WRITE; - final long scheduledSize = blockSize * node.getBlocksScheduled(); - if (requiredSize > storage.getRemaining() - scheduledSize) { - logNodeIsNotChosen(storage, "the node does not have enough space "); + final long scheduledSize = blockSize * node.getBlocksScheduled(storage.getStorageType()); + final long remaining = node.getRemaining(storage.getStorageType()); + if (requiredSize > remaining - scheduledSize) { + logNodeIsNotChosen(storage, "the node does not have enough " + + storage.getStorageType() + " space" + + " (required=" + requiredSize + + ", scheduled=" + scheduledSize + + ", remaining=" + remaining + ")"); return false; } @@ -645,8 +755,8 @@ private boolean isGoodTarget(DatanodeStorageInfo storage, final double maxLoad = 2.0 * stats.getInServiceXceiverAverage(); final int nodeLoad = node.getXceiverCount(); if (nodeLoad > maxLoad) { - logNodeIsNotChosen(storage, - "the node is too busy (load:"+nodeLoad+" > "+maxLoad+") "); + logNodeIsNotChosen(storage, "the node is too busy (load: " + nodeLoad + + " > " + maxLoad + ") "); return false; } } @@ -666,7 +776,7 @@ private boolean isGoodTarget(DatanodeStorageInfo storage, } return true; } - + /** * Return a pipeline of nodes. * The pipeline is formed finding a shortest path that @@ -732,7 +842,8 @@ public BlockPlacementStatus verifyBlockPlacement(String srcPath, public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, Block block, short replicationFactor, Collection first, - Collection second) { + Collection second, + final List excessTypes) { long oldestHeartbeat = now() - heartbeatInterval * tolerateHeartbeatMultiplier; DatanodeStorageInfo oldestHeartbeatStorage = null; @@ -742,6 +853,10 @@ public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, // Pick the node with the oldest heartbeat or with the least free space, // if all hearbeats are within the tolerable heartbeat interval for(DatanodeStorageInfo storage : pickupReplicaSet(first, second)) { + if (!excessTypes.contains(storage.getStorageType())) { + continue; + } + final DatanodeDescriptor node = storage.getDatanodeDescriptor(); long free = node.getRemaining(); long lastHeartbeat = node.getLastUpdate(); @@ -755,8 +870,16 @@ public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, } } - return oldestHeartbeatStorage != null? oldestHeartbeatStorage - : minSpaceStorage; + final DatanodeStorageInfo storage; + if (oldestHeartbeatStorage != null) { + storage = oldestHeartbeatStorage; + } else if (minSpaceStorage != null) { + storage = minSpaceStorage; + } else { + return null; + } + excessTypes.remove(storage.getStorageType()); + return storage; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java index b3ff6b9b1f061..60e192b34a567 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java @@ -17,12 +17,7 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSUtil; @@ -70,22 +65,33 @@ public void initialize(Configuration conf, FSClusterStats stats, protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, Set excludedNodes, long blocksize, int maxNodesPerRack, List results, boolean avoidStaleNodes, - StorageType storageType, boolean fallbackToLocalRack - ) throws NotEnoughReplicasException { + EnumMap storageTypes, boolean fallbackToLocalRack) + throws NotEnoughReplicasException { // if no local machine, randomly choose one node if (localMachine == null) return chooseRandom(NodeBase.ROOT, excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); + blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes); // otherwise try local machine first if (localMachine instanceof DatanodeDescriptor) { DatanodeDescriptor localDataNode = (DatanodeDescriptor)localMachine; if (excludedNodes.add(localMachine)) { // was not in the excluded list - for(DatanodeStorageInfo localStorage : DFSUtil.shuffle( - localDataNode.getStorageInfos())) { - if (addIfIsGoodTarget(localStorage, excludedNodes, blocksize, - maxNodesPerRack, false, results, avoidStaleNodes, storageType) >= 0) { - return localStorage; + for (Iterator> iter = storageTypes + .entrySet().iterator(); iter.hasNext(); ) { + Map.Entry entry = iter.next(); + for (DatanodeStorageInfo localStorage : DFSUtil.shuffle( + localDataNode.getStorageInfos())) { + StorageType type = entry.getKey(); + if (addIfIsGoodTarget(localStorage, excludedNodes, blocksize, + maxNodesPerRack, false, results, avoidStaleNodes, type) >= 0) { + int num = entry.getValue(); + if (num == 1) { + iter.remove(); + } else { + entry.setValue(num - 1); + } + return localStorage; + } } } } @@ -94,7 +100,7 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, // try a node on local node group DatanodeStorageInfo chosenStorage = chooseLocalNodeGroup( (NetworkTopologyWithNodeGroup)clusterMap, localMachine, excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); + blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes); if (chosenStorage != null) { return chosenStorage; } @@ -104,7 +110,7 @@ protected DatanodeStorageInfo chooseLocalStorage(Node localMachine, } // try a node on local rack return chooseLocalRack(localMachine, excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); + blocksize, maxNodesPerRack, results, avoidStaleNodes, storageTypes); } /** @return the node of the second replica */ @@ -124,18 +130,19 @@ private static DatanodeDescriptor secondNode(Node localMachine, protected DatanodeStorageInfo chooseLocalRack(Node localMachine, Set excludedNodes, long blocksize, int maxNodesPerRack, List results, boolean avoidStaleNodes, - StorageType storageType) throws NotEnoughReplicasException { + EnumMap storageTypes) throws + NotEnoughReplicasException { // no local machine, so choose a random machine if (localMachine == null) { return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } // choose one from the local rack, but off-nodegroup try { final String scope = NetworkTopology.getFirstHalf(localMachine.getNetworkLocation()); return chooseRandom(scope, excludedNodes, blocksize, maxNodesPerRack, - results, avoidStaleNodes, storageType); + results, avoidStaleNodes, storageTypes); } catch (NotEnoughReplicasException e1) { // find the second replica final DatanodeDescriptor newLocal = secondNode(localMachine, results); @@ -143,16 +150,17 @@ protected DatanodeStorageInfo chooseLocalRack(Node localMachine, try { return chooseRandom( clusterMap.getRack(newLocal.getNetworkLocation()), excludedNodes, - blocksize, maxNodesPerRack, results, avoidStaleNodes, storageType); + blocksize, maxNodesPerRack, results, avoidStaleNodes, + storageTypes); } catch(NotEnoughReplicasException e2) { //otherwise randomly choose one from the network return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } else { //otherwise randomly choose one from the network return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } } @@ -161,8 +169,8 @@ protected DatanodeStorageInfo chooseLocalRack(Node localMachine, protected void chooseRemoteRack(int numOfReplicas, DatanodeDescriptor localMachine, Set excludedNodes, long blocksize, int maxReplicasPerRack, List results, - boolean avoidStaleNodes, StorageType storageType) - throws NotEnoughReplicasException { + boolean avoidStaleNodes, EnumMap storageTypes) + throws NotEnoughReplicasException { int oldNumOfReplicas = results.size(); final String rackLocation = NetworkTopology.getFirstHalf( @@ -170,12 +178,12 @@ protected void chooseRemoteRack(int numOfReplicas, try { // randomly choose from remote racks chooseRandom(numOfReplicas, "~" + rackLocation, excludedNodes, blocksize, - maxReplicasPerRack, results, avoidStaleNodes, storageType); + maxReplicasPerRack, results, avoidStaleNodes, storageTypes); } catch (NotEnoughReplicasException e) { // fall back to the local rack chooseRandom(numOfReplicas - (results.size() - oldNumOfReplicas), rackLocation, excludedNodes, blocksize, - maxReplicasPerRack, results, avoidStaleNodes, storageType); + maxReplicasPerRack, results, avoidStaleNodes, storageTypes); } } @@ -189,11 +197,12 @@ private DatanodeStorageInfo chooseLocalNodeGroup( NetworkTopologyWithNodeGroup clusterMap, Node localMachine, Set excludedNodes, long blocksize, int maxNodesPerRack, List results, boolean avoidStaleNodes, - StorageType storageType) throws NotEnoughReplicasException { + EnumMap storageTypes) throws + NotEnoughReplicasException { // no local machine, so choose a random machine if (localMachine == null) { return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } // choose one from the local node group @@ -201,7 +210,7 @@ private DatanodeStorageInfo chooseLocalNodeGroup( return chooseRandom( clusterMap.getNodeGroup(localMachine.getNetworkLocation()), excludedNodes, blocksize, maxNodesPerRack, results, avoidStaleNodes, - storageType); + storageTypes); } catch (NotEnoughReplicasException e1) { final DatanodeDescriptor newLocal = secondNode(localMachine, results); if (newLocal != null) { @@ -209,16 +218,16 @@ private DatanodeStorageInfo chooseLocalNodeGroup( return chooseRandom( clusterMap.getNodeGroup(newLocal.getNetworkLocation()), excludedNodes, blocksize, maxNodesPerRack, results, - avoidStaleNodes, storageType); + avoidStaleNodes, storageTypes); } catch(NotEnoughReplicasException e2) { //otherwise randomly choose one from the network return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } else { //otherwise randomly choose one from the network return chooseRandom(NodeBase.ROOT, excludedNodes, blocksize, - maxNodesPerRack, results, avoidStaleNodes, storageType); + maxNodesPerRack, results, avoidStaleNodes, storageTypes); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 34be727ff02d0..55599f7d3ae52 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -28,16 +28,19 @@ import java.util.Queue; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.server.namenode.CachedBlock; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.hdfs.util.EnumCounters; import org.apache.hadoop.hdfs.util.LightWeightHashSet; import org.apache.hadoop.util.IntrusiveCollection; import org.apache.hadoop.util.Time; @@ -202,8 +205,10 @@ public CachedBlocksList getPendingUncached() { * in case of errors (e.g. datanode does not report if an error occurs * while writing the block). */ - private int currApproxBlocksScheduled = 0; - private int prevApproxBlocksScheduled = 0; + private EnumCounters currApproxBlocksScheduled + = new EnumCounters(StorageType.class); + private EnumCounters prevApproxBlocksScheduled + = new EnumCounters(StorageType.class); private long lastBlocksScheduledRollTime = 0; private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min private int volumeFailures = 0; @@ -474,25 +479,48 @@ public Block[] getInvalidateBlocks(int maxblocks) { } } + /** + * @return Approximate number of blocks currently scheduled to be written + */ + public long getRemaining(StorageType t) { + long remaining = 0; + for(DatanodeStorageInfo s : getStorageInfos()) { + if (s.getStorageType() == t) { + remaining += s.getRemaining(); + } + } + return remaining; + } + + /** + * @return Approximate number of blocks currently scheduled to be written + * to the given storage type of this datanode. + */ + public int getBlocksScheduled(StorageType t) { + return (int)(currApproxBlocksScheduled.get(t) + + prevApproxBlocksScheduled.get(t)); + } + /** * @return Approximate number of blocks currently scheduled to be written * to this datanode. */ public int getBlocksScheduled() { - return currApproxBlocksScheduled + prevApproxBlocksScheduled; + return (int)(currApproxBlocksScheduled.sum() + + prevApproxBlocksScheduled.sum()); } /** Increment the number of blocks scheduled. */ - void incrementBlocksScheduled() { - currApproxBlocksScheduled++; + void incrementBlocksScheduled(StorageType t) { + currApproxBlocksScheduled.add(t, 1);; } /** Decrement the number of blocks scheduled. */ - void decrementBlocksScheduled() { - if (prevApproxBlocksScheduled > 0) { - prevApproxBlocksScheduled--; - } else if (currApproxBlocksScheduled > 0) { - currApproxBlocksScheduled--; + void decrementBlocksScheduled(StorageType t) { + if (prevApproxBlocksScheduled.get(t) > 0) { + prevApproxBlocksScheduled.subtract(t, 1); + } else if (currApproxBlocksScheduled.get(t) > 0) { + currApproxBlocksScheduled.subtract(t, 1); } // its ok if both counters are zero. } @@ -500,8 +528,8 @@ void decrementBlocksScheduled() { /** Adjusts curr and prev number of blocks scheduled every few minutes. */ private void rollBlocksScheduled(long now) { if (now - lastBlocksScheduledRollTime > BLOCKS_SCHEDULED_ROLL_INTERVAL) { - prevApproxBlocksScheduled = currApproxBlocksScheduled; - currApproxBlocksScheduled = 0; + prevApproxBlocksScheduled.set(currApproxBlocksScheduled); + currApproxBlocksScheduled.reset(); lastBlocksScheduledRollTime = now; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 709f060d2371f..5314f09a3fd2a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -348,8 +348,7 @@ private boolean isInactive(DatanodeInfo datanode) { /** Sort the located blocks by the distance to the target host. */ public void sortLocatedBlocks(final String targethost, - final List locatedblocks, - boolean randomizeBlockLocationsPerBlock) { + final List locatedblocks) { //sort the blocks // As it is possible for the separation of node manager and datanode, // here we should get node but not datanode only . @@ -376,8 +375,7 @@ public void sortLocatedBlocks(final String targethost, --lastActiveIndex; } int activeLen = lastActiveIndex + 1; - networktopology.sortByDistance(client, b.getLocations(), activeLen, b - .getBlock().getBlockId(), randomizeBlockLocationsPerBlock); + networktopology.sortByDistance(client, b.getLocations(), activeLen); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java index 4ddb7cc146359..58ca2ace254f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeStorageInfo.java @@ -109,7 +109,7 @@ public void remove() { private long capacity; private long dfsUsed; - private long remaining; + private volatile long remaining; private long blockPoolUsed; private volatile BlockInfo blockList = null; @@ -283,7 +283,7 @@ public DatanodeDescriptor getDatanodeDescriptor() { /** Increment the number of blocks scheduled for each given storage */ public static void incrementBlocksScheduled(DatanodeStorageInfo... storages) { for (DatanodeStorageInfo s : storages) { - s.getDatanodeDescriptor().incrementBlocksScheduled(); + s.getDatanodeDescriptor().incrementBlocksScheduled(s.getStorageType()); } } @@ -314,6 +314,26 @@ StorageReport toStorageReport() { false, capacity, dfsUsed, remaining, blockPoolUsed); } + static Iterable toStorageTypes( + final Iterable infos) { + return new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + final Iterator i = infos.iterator(); + @Override + public boolean hasNext() {return i.hasNext();} + @Override + public StorageType next() {return i.next().getStorageType();} + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } + /** @return the first {@link DatanodeStorageInfo} corresponding to * the given datanode */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java index 98c6398c2cb9a..767c1b559f9c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/HdfsServerConstants.java @@ -94,7 +94,12 @@ static public enum StartupOption{ NONINTERACTIVE("-nonInteractive"), RENAMERESERVED("-renameReserved"), METADATAVERSION("-metadataVersion"), - UPGRADEONLY("-upgradeOnly"); + UPGRADEONLY("-upgradeOnly"), + // The -hotswap constant should not be used as a startup option, it is + // only used for StorageDirectory.analyzeStorage() in hot swap drive scenario. + // TODO refactor StorageDirectory.analyzeStorage() so that we can do away with + // this in StartupOption. + HOTSWAP("-hotswap"); private static final Pattern ENUM_WITH_ROLLING_UPGRADE_OPTION = Pattern.compile( "(\\w+)\\((\\w+)\\)"); @@ -299,5 +304,6 @@ static public enum BlockUCState { "raw.hdfs.crypto.encryption.zone"; public static final String CRYPTO_XATTR_FILE_ENCRYPTION_INFO = "raw.hdfs.crypto.file.encryption.info"; + public static final String SECURITY_XATTR_UNREADABLE_BY_SUPERUSER = + "security.hdfs.unreadable.by.superuser"; } - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index 1d68727b1ff40..7933feddeda1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -464,17 +464,20 @@ public File getPreviousCheckpoint() { public StorageState analyzeStorage(StartupOption startOpt, Storage storage) throws IOException { assert root != null : "root is null"; + boolean hadMkdirs = false; String rootPath = root.getCanonicalPath(); try { // check that storage exists if (!root.exists()) { // storage directory does not exist - if (startOpt != StartupOption.FORMAT) { + if (startOpt != StartupOption.FORMAT && + startOpt != StartupOption.HOTSWAP) { LOG.warn("Storage directory " + rootPath + " does not exist"); return StorageState.NON_EXISTENT; } LOG.info(rootPath + " does not exist. Creating ..."); if (!root.mkdirs()) throw new IOException("Cannot create directory " + rootPath); + hadMkdirs = true; } // or is inaccessible if (!root.isDirectory()) { @@ -492,7 +495,10 @@ public StorageState analyzeStorage(StartupOption startOpt, Storage storage) this.lock(); // lock storage if it exists - if (startOpt == HdfsServerConstants.StartupOption.FORMAT) + // If startOpt is HOTSWAP, it returns NOT_FORMATTED for empty directory, + // while it also checks the layout version. + if (startOpt == HdfsServerConstants.StartupOption.FORMAT || + (startOpt == StartupOption.HOTSWAP && hadMkdirs)) return StorageState.NOT_FORMATTED; if (startOpt != HdfsServerConstants.StartupOption.IMPORT) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java index 4a36472cb0025..31276825603b0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java @@ -48,6 +48,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATA_ENCRYPTION_ALGORITHM_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_RESTART_REPLICA_EXPIRY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_RESTART_REPLICA_EXPIRY_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_DEFAULT; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -90,6 +92,7 @@ public class DNConf { final String encryptionAlgorithm; final SaslPropertiesResolver saslPropsResolver; final TrustedChannelResolver trustedChannelResolver; + private final boolean ignoreSecurePortsForTesting; final long xceiverStopTimeout; final long restartReplicaExpiry; @@ -173,6 +176,9 @@ public DNConf(Configuration conf) { this.trustedChannelResolver = TrustedChannelResolver.getInstance(conf); this.saslPropsResolver = DataTransferSaslUtil.getSaslPropertiesResolver( conf); + this.ignoreSecurePortsForTesting = conf.getBoolean( + IGNORE_SECURE_PORTS_FOR_TESTING_KEY, + IGNORE_SECURE_PORTS_FOR_TESTING_DEFAULT); this.xceiverStopTimeout = conf.getLong( DFS_DATANODE_XCEIVER_STOP_TIMEOUT_MILLIS_KEY, @@ -238,4 +244,15 @@ public SaslPropertiesResolver getSaslPropsResolver() { public TrustedChannelResolver getTrustedChannelResolver() { return trustedChannelResolver; } + + /** + * Returns true if configuration is set to skip checking for proper + * port configuration in a secured cluster. This is only intended for use in + * dev testing. + * + * @return true if configured to skip checking secured port configuration + */ + public boolean getIgnoreSecurePortsForTesting() { + return ignoreSecurePortsForTesting; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 381062171b0b2..b1ef18673ae3b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -17,8 +17,6 @@ */ package org.apache.hadoop.hdfs.server.datanode; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ADMIN; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_ADDRESS_KEY; @@ -46,9 +44,12 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATA_TRANSFER_PROTECTION_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY; import static org.apache.hadoop.util.ExitUtil.terminate; import java.io.BufferedOutputStream; @@ -70,8 +71,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -80,11 +83,13 @@ import javax.management.ObjectName; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.conf.ReconfigurableBase; +import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; @@ -137,6 +142,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.JspHelper; +import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; @@ -165,6 +171,7 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.SaslPropertiesResolver; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; @@ -220,7 +227,7 @@ * **********************************************************/ @InterfaceAudience.Private -public class DataNode extends Configured +public class DataNode extends ReconfigurableBase implements InterDatanodeProtocol, ClientDatanodeProtocol, DataNodeMXBean { public static final Log LOG = LogFactory.getLog(DataNode.class); @@ -305,6 +312,7 @@ public static InetSocketAddress createSocketAddr(String target) { private JvmPauseMonitor pauseMonitor; private SecureResources secureResources = null; + // dataDirs must be accessed while holding the DataNode lock. private List dataDirs; private Configuration conf; private final String confVersion; @@ -386,6 +394,149 @@ public static InetSocketAddress createSocketAddr(String target) { } } + @Override + public void reconfigurePropertyImpl(String property, String newVal) + throws ReconfigurationException { + if (property.equals(DFS_DATANODE_DATA_DIR_KEY)) { + try { + LOG.info("Reconfiguring " + property + " to " + newVal); + this.refreshVolumes(newVal); + } catch (Exception e) { + throw new ReconfigurationException(property, newVal, + getConf().get(property), e); + } + } else { + throw new ReconfigurationException( + property, newVal, getConf().get(property)); + } + } + + /** + * Get a list of the keys of the re-configurable properties in configuration. + */ + @Override + public Collection getReconfigurableProperties() { + List reconfigurable = + Collections.unmodifiableList(Arrays.asList(DFS_DATANODE_DATA_DIR_KEY)); + return reconfigurable; + } + + /** + * Contains the StorageLocations for changed data volumes. + */ + @VisibleForTesting + static class ChangedVolumes { + List newLocations = Lists.newArrayList(); + List deactivateLocations = Lists.newArrayList(); + } + + /** + * Parse the new DFS_DATANODE_DATA_DIR value in the configuration to detect + * changed volumes. + * @return changed volumes. + * @throws IOException if none of the directories are specified in the + * configuration. + */ + @VisibleForTesting + ChangedVolumes parseChangedVolumes() throws IOException { + List locations = getStorageLocations(getConf()); + + if (locations.isEmpty()) { + throw new IOException("No directory is specified."); + } + + ChangedVolumes results = new ChangedVolumes(); + results.newLocations.addAll(locations); + + for (Iterator it = storage.dirIterator(); + it.hasNext(); ) { + Storage.StorageDirectory dir = it.next(); + boolean found = false; + for (Iterator sl = results.newLocations.iterator(); + sl.hasNext(); ) { + if (sl.next().getFile().getCanonicalPath().equals( + dir.getRoot().getCanonicalPath())) { + sl.remove(); + found = true; + break; + } + } + + if (!found) { + results.deactivateLocations.add( + StorageLocation.parse(dir.getRoot().toString())); + } + } + + return results; + } + + /** + * Attempts to reload data volumes with new configuration. + * @param newVolumes a comma separated string that specifies the data volumes. + * @throws Exception + */ + private synchronized void refreshVolumes(String newVolumes) throws Exception { + Configuration conf = getConf(); + String oldVolumes = conf.get(DFS_DATANODE_DATA_DIR_KEY); + conf.set(DFS_DATANODE_DATA_DIR_KEY, newVolumes); + List locations = getStorageLocations(conf); + + final int numOldDataDirs = dataDirs.size(); + dataDirs = locations; + ChangedVolumes changedVolumes = parseChangedVolumes(); + + try { + if (numOldDataDirs + changedVolumes.newLocations.size() - + changedVolumes.deactivateLocations.size() <= 0) { + throw new IOException("Attempt to remove all volumes."); + } + if (!changedVolumes.newLocations.isEmpty()) { + LOG.info("Adding new volumes: " + + Joiner.on(",").join(changedVolumes.newLocations)); + + // Add volumes for each Namespace + for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) { + NamespaceInfo nsInfo = bpos.getNamespaceInfo(); + LOG.info("Loading volumes for namesapce: " + nsInfo.getNamespaceID()); + storage.addStorageLocations( + this, nsInfo, changedVolumes.newLocations, StartupOption.HOTSWAP); + } + List bpids = Lists.newArrayList(); + for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) { + bpids.add(bpos.getBlockPoolId()); + } + List succeedVolumes = + data.addVolumes(changedVolumes.newLocations, bpids); + + if (succeedVolumes.size() < changedVolumes.newLocations.size()) { + List failedVolumes = Lists.newArrayList(); + // Clean all failed volumes. + for (StorageLocation location : changedVolumes.newLocations) { + if (!succeedVolumes.contains(location)) { + failedVolumes.add(location); + } + } + storage.removeVolumes(failedVolumes); + data.removeVolumes(failedVolumes); + } + } + + if (!changedVolumes.deactivateLocations.isEmpty()) { + LOG.info("Deactivating volumes: " + + Joiner.on(",").join(changedVolumes.deactivateLocations)); + + data.removeVolumes(changedVolumes.deactivateLocations); + storage.removeVolumes(changedVolumes.deactivateLocations); + } + } catch (IOException e) { + LOG.warn("There is IOException when refreshing volumes! " + + "Recover configurations: " + DFS_DATANODE_DATA_DIR_KEY + + " = " + oldVolumes, e); + throw e; + } + } + private synchronized void setClusterId(final String nsCid, final String bpid ) throws IOException { if(clusterId != null && !clusterId.equals(nsCid)) { @@ -818,13 +969,15 @@ void startDataNode(Configuration conf, SecureResources resources ) throws IOException { - checkSecureConfig(conf, resources); - // settings global for all BPs in the Data Node this.secureResources = resources; - this.dataDirs = dataDirs; + synchronized (this) { + this.dataDirs = dataDirs; + } this.conf = conf; this.dnConf = new DNConf(conf); + checkSecureConfig(dnConf, conf, resources); + this.spanReceiverHost = SpanReceiverHost.getInstance(conf); if (dnConf.maxLockedMemory > 0) { @@ -880,10 +1033,7 @@ void startDataNode(Configuration conf, // exit without having to explicitly shutdown its thread pool. readaheadPool = ReadaheadPool.getInstance(); saslClient = new SaslDataTransferClient(dnConf.saslPropsResolver, - dnConf.trustedChannelResolver, - conf.getBoolean( - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT)); + dnConf.trustedChannelResolver); saslServer = new SaslDataTransferServer(dnConf, blockPoolTokenSecretManager); } @@ -903,23 +1053,24 @@ void startDataNode(Configuration conf, * must check if the target port is a privileged port, and if so, skip the * SASL handshake. * + * @param dnConf DNConf to check * @param conf Configuration to check * @param resources SecuredResources obtained for DataNode * @throws RuntimeException if security enabled, but configuration is insecure */ - private static void checkSecureConfig(Configuration conf, + private static void checkSecureConfig(DNConf dnConf, Configuration conf, SecureResources resources) throws RuntimeException { if (!UserGroupInformation.isSecurityEnabled()) { return; } - String dataTransferProtection = conf.get(DFS_DATA_TRANSFER_PROTECTION_KEY); - if (resources != null && dataTransferProtection == null) { + SaslPropertiesResolver saslPropsResolver = dnConf.getSaslPropsResolver(); + if (resources != null && saslPropsResolver == null) { return; } - if (conf.getBoolean("ignore.secure.ports.for.testing", false)) { + if (dnConf.getIgnoreSecurePortsForTesting()) { return; } - if (dataTransferProtection != null && + if (saslPropsResolver != null && DFSUtil.getHttpPolicy(conf) == HttpConfig.Policy.HTTPS_ONLY && resources == null) { return; @@ -1112,7 +1263,9 @@ private void initStorage(final NamespaceInfo nsInfo) throws IOException { } final String bpid = nsInfo.getBlockPoolID(); //read storage info, lock data dirs and transition fs state if necessary - storage.recoverTransitionRead(this, bpid, nsInfo, dataDirs, startOpt); + synchronized (this) { + storage.recoverTransitionRead(this, bpid, nsInfo, dataDirs, startOpt); + } final StorageInfo bpStorage = storage.getBPStorage(bpid); LOG.info("Setting up storage: nsid=" + bpStorage.getNamespaceID() + ";bpid=" + bpid + ";lv=" + storage.getLayoutVersion() @@ -1744,7 +1897,9 @@ private class DataTransfer implements Runnable { + b + " (numBytes=" + b.getNumBytes() + ")" + ", stage=" + stage + ", clientname=" + clientname - + ", targets=" + Arrays.asList(targets)); + + ", targets=" + Arrays.asList(targets) + + ", target storage types=" + (targetStorageTypes == null ? "[]" : + Arrays.asList(targetStorageTypes))); } this.targets = targets; this.targetStorageTypes = targetStorageTypes; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java index 4383e56153a6b..965b6554f91d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataStorage.java @@ -375,6 +375,13 @@ synchronized void removeVolumes(Collection locations) { StorageDirectory sd = it.next(); if (dataDirs.contains(sd.getRoot())) { it.remove(); + try { + sd.unlock(); + } catch (IOException e) { + LOG.warn(String.format( + "I/O error attempting to unlock storage directory %s.", + sd.getRoot()), e); + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java index 553208eeafdae..4c03151e3b11a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java @@ -94,8 +94,8 @@ public RollingLogs createRollingLogs(String bpid, String prefix public List getVolumes(); /** Add an array of StorageLocation to FsDataset. */ - public void addVolumes(Collection volumes) - throws IOException; + public List addVolumes(List volumes, + final Collection bpids); /** Removes a collection of volumes from FsDataset. */ public void removeVolumes(Collection volumes); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 05c99143c8a29..6e8108206d02d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -28,19 +28,23 @@ import java.nio.ByteBuffer; import java.nio.channels.FileChannel; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Executor; import javax.management.NotCompliantMBeanException; import javax.management.ObjectName; import javax.management.StandardMBean; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -85,6 +89,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.io.MultipleIOException; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.util.DataChecksum; @@ -245,7 +250,7 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b) + ", volume failures tolerated: " + volFailuresTolerated); } - storageMap = new HashMap(); + storageMap = new ConcurrentHashMap(); volumeMap = new ReplicaMap(this); @SuppressWarnings("unchecked") final VolumeChoosingPolicy blockChooserImpl = @@ -275,45 +280,124 @@ private void addVolume(Collection dataLocations, // storageMap and asyncDiskService, consistent. FsVolumeImpl fsVolume = new FsVolumeImpl( this, sd.getStorageUuid(), dir, this.conf, storageType); - fsVolume.getVolumeMap(volumeMap); + ReplicaMap tempVolumeMap = new ReplicaMap(this); + fsVolume.getVolumeMap(tempVolumeMap); + volumeMap.addAll(tempVolumeMap); volumes.addVolume(fsVolume); storageMap.put(sd.getStorageUuid(), new DatanodeStorage(sd.getStorageUuid(), - DatanodeStorage.State.NORMAL, - storageType)); + DatanodeStorage.State.NORMAL, + storageType)); asyncDiskService.addVolume(sd.getCurrentDir()); LOG.info("Added volume - " + dir + ", StorageType: " + storageType); } + private void addVolumeAndBlockPool(Collection dataLocations, + Storage.StorageDirectory sd, final Collection bpids) + throws IOException { + final File dir = sd.getCurrentDir(); + final StorageType storageType = + getStorageTypeFromLocations(dataLocations, sd.getRoot()); + + final FsVolumeImpl fsVolume = new FsVolumeImpl( + this, sd.getStorageUuid(), dir, this.conf, storageType); + final ReplicaMap tempVolumeMap = new ReplicaMap(fsVolume); + + List exceptions = Lists.newArrayList(); + for (final String bpid : bpids) { + try { + fsVolume.addBlockPool(bpid, this.conf); + fsVolume.getVolumeMap(bpid, tempVolumeMap); + } catch (IOException e) { + LOG.warn("Caught exception when adding " + fsVolume + + ". Will throw later.", e); + exceptions.add(e); + } + } + if (!exceptions.isEmpty()) { + // The states of FsDatasteImpl are not modified, thus no need to rolled back. + throw MultipleIOException.createIOException(exceptions); + } + + volumeMap.addAll(tempVolumeMap); + storageMap.put(sd.getStorageUuid(), + new DatanodeStorage(sd.getStorageUuid(), + DatanodeStorage.State.NORMAL, + storageType)); + asyncDiskService.addVolume(sd.getCurrentDir()); + volumes.addVolume(fsVolume); + + LOG.info("Added volume - " + dir + ", StorageType: " + storageType); + } + /** * Add an array of StorageLocation to FsDataset. * * @pre dataStorage must have these volumes. - * @param volumes - * @throws IOException + * @param volumes an array of storage locations for adding volumes. + * @param bpids block pool IDs. + * @return an array of successfully loaded volumes. */ @Override - public synchronized void addVolumes(Collection volumes) - throws IOException { + public synchronized List addVolumes( + final List volumes, final Collection bpids) { final Collection dataLocations = DataNode.getStorageLocations(this.conf); - Map allStorageDirs = + final Map allStorageDirs = new HashMap(); - for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { - Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); - allStorageDirs.put(sd.getRoot().getAbsolutePath(), sd); + List succeedVolumes = Lists.newArrayList(); + try { + for (int idx = 0; idx < dataStorage.getNumStorageDirs(); idx++) { + Storage.StorageDirectory sd = dataStorage.getStorageDir(idx); + allStorageDirs.put(sd.getRoot().getCanonicalPath(), sd); + } + } catch (IOException ioe) { + LOG.warn("Caught exception when parsing storage URL.", ioe); + return succeedVolumes; + } + + final boolean[] successFlags = new boolean[volumes.size()]; + Arrays.fill(successFlags, false); + List volumeAddingThreads = Lists.newArrayList(); + for (int i = 0; i < volumes.size(); i++) { + final int idx = i; + Thread t = new Thread() { + public void run() { + StorageLocation vol = volumes.get(idx); + try { + String key = vol.getFile().getCanonicalPath(); + if (!allStorageDirs.containsKey(key)) { + LOG.warn("Attempt to add an invalid volume: " + vol.getFile()); + } else { + addVolumeAndBlockPool(dataLocations, allStorageDirs.get(key), + bpids); + successFlags[idx] = true; + } + } catch (IOException e) { + LOG.warn("Caught exception when adding volume " + vol, e); + } + } + }; + volumeAddingThreads.add(t); + t.start(); } - for (StorageLocation vol : volumes) { - String key = vol.getFile().getAbsolutePath(); - if (!allStorageDirs.containsKey(key)) { - LOG.warn("Attempt to add an invalid volume: " + vol.getFile()); - } else { - addVolume(dataLocations, allStorageDirs.get(key)); + for (Thread t : volumeAddingThreads) { + try { + t.join(); + } catch (InterruptedException e) { + LOG.warn("Caught InterruptedException when adding volume.", e); } } + + for (int i = 0; i < volumes.size(); i++) { + if (successFlags[i]) { + succeedVolumes.add(volumes.get(i)); + } + } + return succeedVolumes; } /** @@ -335,9 +419,9 @@ public synchronized void removeVolumes(Collection volumes) { String volume = sd.getRoot().toString(); LOG.info("Removing " + volume + " from FsDataset."); - this.volumes.removeVolume(volume); - storageMap.remove(sd.getStorageUuid()); + // Disable the volume from the service. asyncDiskService.removeVolume(sd.getCurrentDir()); + this.volumes.removeVolume(volume); // Removed all replica information for the blocks on the volume. Unlike // updating the volumeMap in addVolume(), this operation does not scan @@ -348,7 +432,7 @@ public synchronized void removeVolumes(Collection volumes) { it.hasNext(); ) { ReplicaInfo block = it.next(); if (block.getVolume().getBasePath().equals(volume)) { - invalidate(bpid, block.getBlockId()); + invalidate(bpid, block); blocks.add(block); it.remove(); } @@ -357,6 +441,8 @@ public synchronized void removeVolumes(Collection volumes) { datanode.getBlockScanner().deleteBlocks(bpid, blocks.toArray(new Block[blocks.size()])); } + + storageMap.remove(sd.getStorageUuid()); } } } @@ -818,7 +904,7 @@ public synchronized ReplicaInPipeline recoverAppend(ExtendedBlock b, } @Override // FsDatasetSpi - public String recoverClose(ExtendedBlock b, long newGS, + public synchronized String recoverClose(ExtendedBlock b, long newGS, long expectedBlockLen) throws IOException { LOG.info("Recover failed close " + b); // check replica's state @@ -1004,9 +1090,17 @@ public synchronized ReplicaInPipeline createTemporary(StorageType storageType, ExtendedBlock b) throws IOException { ReplicaInfo replicaInfo = volumeMap.get(b.getBlockPoolId(), b.getBlockId()); if (replicaInfo != null) { - throw new ReplicaAlreadyExistsException("Block " + b + - " already exists in state " + replicaInfo.getState() + - " and thus cannot be created."); + if (replicaInfo.getGenerationStamp() < b.getGenerationStamp() + && replicaInfo instanceof ReplicaInPipeline) { + // Stop the previous writer + ((ReplicaInPipeline)replicaInfo) + .stopWriter(datanode.getDnConf().getXceiverStopTimeout()); + invalidate(b.getBlockPoolId(), new Block[]{replicaInfo}); + } else { + throw new ReplicaAlreadyExistsException("Block " + b + + " already exists in state " + replicaInfo.getState() + + " and thus cannot be created."); + } } FsVolumeImpl v = volumes.getNextVolume(storageType, b.getNumBytes()); @@ -1345,23 +1439,26 @@ public void invalidate(String bpid, Block invalidBlks[]) throws IOException { /** * Invalidate a block but does not delete the actual on-disk block file. * - * It should only be used for decommissioning disks. + * It should only be used when deactivating disks. * * @param bpid the block pool ID. - * @param blockId the ID of the block. + * @param block The block to be invalidated. */ - public void invalidate(String bpid, long blockId) { + public void invalidate(String bpid, ReplicaInfo block) { // If a DFSClient has the replica in its cache of short-circuit file // descriptors (and the client is using ShortCircuitShm), invalidate it. // The short-circuit registry is null in the unit tests, because the // datanode is mock object. if (datanode.getShortCircuitRegistry() != null) { datanode.getShortCircuitRegistry().processBlockInvalidation( - new ExtendedBlockId(blockId, bpid)); + new ExtendedBlockId(block.getBlockId(), bpid)); // If the block is cached, start uncaching it. - cacheManager.uncacheBlock(bpid, blockId); + cacheManager.uncacheBlock(bpid, block.getBlockId()); } + + datanode.notifyNamenodeDeletedBlock(new ExtendedBlock(bpid, block), + block.getStorageUuid()); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index 3952c39159f70..63bc6a1bced46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -69,7 +69,7 @@ public class FsVolumeImpl implements FsVolumeSpi { // Capacity configured. This is useful when we want to // limit the visible capacity for tests. If negative, then we just // query from the filesystem. - protected long configuredCapacity; + protected volatile long configuredCapacity; /** * Per-volume worker pool that processes new blocks to cache. @@ -129,7 +129,7 @@ void decDfsUsed(String bpid, long value) { } } } - + long getDfsUsed() throws IOException { long dfsUsed = 0; synchronized(dataset) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java new file mode 100644 index 0000000000000..858db1d6846e6 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -0,0 +1,650 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.mover; + +import com.google.common.annotations.VisibleForTesting; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.commons.cli.*; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.*; +import org.apache.hadoop.hdfs.protocol.*; +import org.apache.hadoop.hdfs.server.balancer.Dispatcher; +import org.apache.hadoop.hdfs.server.balancer.Dispatcher.*; +import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup; +import org.apache.hadoop.hdfs.server.balancer.ExitStatus; +import org.apache.hadoop.hdfs.server.balancer.Matcher; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.namenode.INode; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.net.URI; +import java.text.DateFormat; +import java.util.*; + +@InterfaceAudience.Private +public class Mover { + static final Log LOG = LogFactory.getLog(Mover.class); + + static final Path MOVER_ID_PATH = new Path("/system/mover.id"); + + private static class StorageMap { + private final StorageGroupMap sources + = new StorageGroupMap(); + private final StorageGroupMap targets + = new StorageGroupMap(); + private final EnumMap> targetStorageTypeMap + = new EnumMap>(StorageType.class); + + private StorageMap() { + for(StorageType t : StorageType.asList()) { + targetStorageTypeMap.put(t, new LinkedList()); + } + } + + private void add(Source source, StorageGroup target) { + sources.put(source); + if (target != null) { + targets.put(target); + getTargetStorages(target.getStorageType()).add(target); + } + } + + private Source getSource(MLocation ml) { + return get(sources, ml); + } + + private StorageGroup getTarget(MLocation ml) { + return get(targets, ml); + } + + private static G get(StorageGroupMap map, MLocation ml) { + return map.get(ml.datanode.getDatanodeUuid(), ml.storageType); + } + + private List getTargetStorages(StorageType t) { + return targetStorageTypeMap.get(t); + } + } + + private final Dispatcher dispatcher; + private final StorageMap storages; + private final List targetPaths; + + private final BlockStoragePolicy.Suite blockStoragePolicies; + + Mover(NameNodeConnector nnc, Configuration conf) { + final long movedWinWidth = conf.getLong( + DFSConfigKeys.DFS_MOVER_MOVEDWINWIDTH_KEY, + DFSConfigKeys.DFS_MOVER_MOVEDWINWIDTH_DEFAULT); + final int moverThreads = conf.getInt( + DFSConfigKeys.DFS_MOVER_MOVERTHREADS_KEY, + DFSConfigKeys.DFS_MOVER_MOVERTHREADS_DEFAULT); + final int maxConcurrentMovesPerNode = conf.getInt( + DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY, + DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT); + + this.dispatcher = new Dispatcher(nnc, Collections. emptySet(), + Collections. emptySet(), movedWinWidth, moverThreads, 0, + maxConcurrentMovesPerNode, conf); + this.storages = new StorageMap(); + this.blockStoragePolicies = BlockStoragePolicy.readBlockStorageSuite(conf); + this.targetPaths = nnc.getTargetPaths(); + } + + void init() throws IOException { + final List reports = dispatcher.init(); + for(DatanodeStorageReport r : reports) { + final DDatanode dn = dispatcher.newDatanode(r.getDatanodeInfo()); + for(StorageType t : StorageType.asList()) { + final Source source = dn.addSource(t, Long.MAX_VALUE, dispatcher); + final long maxRemaining = getMaxRemaining(r, t); + final StorageGroup target = maxRemaining > 0L ? dn.addTarget(t, + maxRemaining) : null; + storages.add(source, target); + } + } + } + + private ExitStatus run() { + try { + init(); + boolean hasRemaining = new Processor().processNamespace(); + return hasRemaining ? ExitStatus.IN_PROGRESS : ExitStatus.SUCCESS; + } catch (IllegalArgumentException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.ILLEGAL_ARGUMENTS; + } catch (IOException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.IO_EXCEPTION; + } finally { + dispatcher.shutdownNow(); + } + } + + DBlock newDBlock(Block block, List locations) { + final DBlock db = new DBlock(block); + for(MLocation ml : locations) { + StorageGroup source = storages.getSource(ml); + if (source != null) { + db.addLocation(source); + } + } + return db; + } + + private static long getMaxRemaining(DatanodeStorageReport report, StorageType t) { + long max = 0L; + for(StorageReport r : report.getStorageReports()) { + if (r.getStorage().getStorageType() == t) { + if (r.getRemaining() > max) { + max = r.getRemaining(); + } + } + } + return max; + } + + /** + * convert a snapshot path to non-snapshot path. E.g., + * /foo/.snapshot/snapshot-name/bar --> /foo/bar + */ + private static String convertSnapshotPath(String[] pathComponents) { + StringBuilder sb = new StringBuilder(Path.SEPARATOR); + for (int i = 0; i < pathComponents.length; i++) { + if (pathComponents[i].equals(HdfsConstants.DOT_SNAPSHOT_DIR)) { + i++; + } else { + sb.append(pathComponents[i]); + } + } + return sb.toString(); + } + + class Processor { + private final DFSClient dfs; + private final List snapshottableDirs = new ArrayList(); + + Processor() { + dfs = dispatcher.getDistributedFileSystem().getClient(); + } + + private void getSnapshottableDirs() { + SnapshottableDirectoryStatus[] dirs = null; + try { + dirs = dfs.getSnapshottableDirListing(); + } catch (IOException e) { + LOG.warn("Failed to get snapshottable directories." + + " Ignore and continue.", e); + } + if (dirs != null) { + for (SnapshottableDirectoryStatus dir : dirs) { + snapshottableDirs.add(dir.getFullPath().toString()); + } + } + } + + /** + * @return true if the given path is a snapshot path and the corresponding + * INode is still in the current fsdirectory. + */ + private boolean isSnapshotPathInCurrent(String path) throws IOException { + // if the parent path contains "/.snapshot/", this is a snapshot path + if (path.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { + String[] pathComponents = INode.getPathNames(path); + if (HdfsConstants.DOT_SNAPSHOT_DIR + .equals(pathComponents[pathComponents.length - 2])) { + // this is a path for a specific snapshot (e.g., /foo/.snapshot/s1) + return false; + } + String nonSnapshotPath = convertSnapshotPath(pathComponents); + return dfs.getFileInfo(nonSnapshotPath) != null; + } else { + return false; + } + } + + /** + * @return whether there is still remaining migration work for the next + * round + */ + private boolean processNamespace() { + getSnapshottableDirs(); + boolean hasRemaining = true; + try { + for (Path target : targetPaths) { + hasRemaining = processDirRecursively("", dfs.getFileInfo(target + .toUri().getPath())); + } + } catch (IOException e) { + LOG.warn("Failed to get root directory status. Ignore and continue.", e); + } + // wait for pending move to finish and retry the failed migration + hasRemaining |= Dispatcher.waitForMoveCompletion(storages.targets.values()); + return hasRemaining; + } + + /** + * @return whether there is still remaing migration work for the next + * round + */ + private boolean processChildrenList(String fullPath) { + boolean hasRemaining = false; + for (byte[] lastReturnedName = HdfsFileStatus.EMPTY_NAME;;) { + final DirectoryListing children; + try { + children = dfs.listPaths(fullPath, lastReturnedName, true); + } catch(IOException e) { + LOG.warn("Failed to list directory " + fullPath + + ". Ignore the directory and continue.", e); + return hasRemaining; + } + if (children == null) { + return hasRemaining; + } + for (HdfsFileStatus child : children.getPartialListing()) { + hasRemaining |= processDirRecursively(fullPath, child); + } + if (children.hasMore()) { + lastReturnedName = children.getLastName(); + } else { + return hasRemaining; + } + } + } + + /** @return whether the migration requires next round */ + private boolean processDirRecursively(String parent, + HdfsFileStatus status) { + String fullPath = status.getFullName(parent); + boolean hasRemaining = false; + if (status.isDir()) { + if (!fullPath.endsWith(Path.SEPARATOR)) { + fullPath = fullPath + Path.SEPARATOR; + } + + hasRemaining = processChildrenList(fullPath); + // process snapshots if this is a snapshottable directory + if (snapshottableDirs.contains(fullPath)) { + final String dirSnapshot = fullPath + HdfsConstants.DOT_SNAPSHOT_DIR; + hasRemaining |= processChildrenList(dirSnapshot); + } + } else if (!status.isSymlink()) { // file + try { + if (!isSnapshotPathInCurrent(fullPath)) { + // the full path is a snapshot path but it is also included in the + // current directory tree, thus ignore it. + hasRemaining = processFile((HdfsLocatedFileStatus)status); + } + } catch (IOException e) { + LOG.warn("Failed to check the status of " + parent + + ". Ignore it and continue.", e); + return false; + } + } + return hasRemaining; + } + + /** @return true if it is necessary to run another round of migration */ + private boolean processFile(HdfsLocatedFileStatus status) { + final BlockStoragePolicy policy = blockStoragePolicies.getPolicy( + status.getStoragePolicy()); + final List types = policy.chooseStorageTypes( + status.getReplication()); + + final LocatedBlocks locatedBlocks = status.getBlockLocations(); + boolean hasRemaining = false; + final boolean lastBlkComplete = locatedBlocks.isLastBlockComplete(); + List lbs = locatedBlocks.getLocatedBlocks(); + for(int i = 0; i < lbs.size(); i++) { + if (i == lbs.size() - 1 && !lastBlkComplete) { + // last block is incomplete, skip it + continue; + } + LocatedBlock lb = lbs.get(i); + final StorageTypeDiff diff = new StorageTypeDiff(types, + lb.getStorageTypes()); + if (!diff.removeOverlap()) { + if (scheduleMoves4Block(diff, lb)) { + hasRemaining |= (diff.existing.size() > 1 && + diff.expected.size() > 1); + } + } + } + return hasRemaining; + } + + boolean scheduleMoves4Block(StorageTypeDiff diff, LocatedBlock lb) { + final List locations = MLocation.toLocations(lb); + Collections.shuffle(locations); + final DBlock db = newDBlock(lb.getBlock().getLocalBlock(), locations); + + for (final StorageType t : diff.existing) { + for (final MLocation ml : locations) { + final Source source = storages.getSource(ml); + if (ml.storageType == t && source != null) { + // try to schedule one replica move. + if (scheduleMoveReplica(db, source, diff.expected)) { + return true; + } + } + } + } + return false; + } + + @VisibleForTesting + boolean scheduleMoveReplica(DBlock db, MLocation ml, + List targetTypes) { + final Source source = storages.getSource(ml); + return source == null ? false : scheduleMoveReplica(db, + storages.getSource(ml), targetTypes); + } + + boolean scheduleMoveReplica(DBlock db, Source source, + List targetTypes) { + if (dispatcher.getCluster().isNodeGroupAware()) { + if (chooseTarget(db, source, targetTypes, Matcher.SAME_NODE_GROUP)) { + return true; + } + } + + // Then, match nodes on the same rack + if (chooseTarget(db, source, targetTypes, Matcher.SAME_RACK)) { + return true; + } + // At last, match all remaining nodes + return chooseTarget(db, source, targetTypes, Matcher.ANY_OTHER); + } + + boolean chooseTarget(DBlock db, Source source, + List targetTypes, Matcher matcher) { + final NetworkTopology cluster = dispatcher.getCluster(); + for (StorageType t : targetTypes) { + for(StorageGroup target : storages.getTargetStorages(t)) { + if (matcher.match(cluster, source.getDatanodeInfo(), + target.getDatanodeInfo())) { + final PendingMove pm = source.addPendingMove(db, target); + if (pm != null) { + dispatcher.executePendingMove(pm); + return true; + } + } + } + } + return false; + } + } + + static class MLocation { + final DatanodeInfo datanode; + final StorageType storageType; + final long size; + + MLocation(DatanodeInfo datanode, StorageType storageType, long size) { + this.datanode = datanode; + this.storageType = storageType; + this.size = size; + } + + static List toLocations(LocatedBlock lb) { + final DatanodeInfo[] datanodeInfos = lb.getLocations(); + final StorageType[] storageTypes = lb.getStorageTypes(); + final long size = lb.getBlockSize(); + final List locations = new LinkedList(); + for(int i = 0; i < datanodeInfos.length; i++) { + locations.add(new MLocation(datanodeInfos[i], storageTypes[i], size)); + } + return locations; + } + } + + @VisibleForTesting + static class StorageTypeDiff { + final List expected; + final List existing; + + StorageTypeDiff(List expected, StorageType[] existing) { + this.expected = new LinkedList(expected); + this.existing = new LinkedList(Arrays.asList(existing)); + } + + /** + * Remove the overlap between the expected types and the existing types. + * @return if the existing types or the expected types is empty after + * removing the overlap. + */ + boolean removeOverlap() { + for(Iterator i = existing.iterator(); i.hasNext(); ) { + final StorageType t = i.next(); + if (expected.remove(t)) { + i.remove(); + } + } + return expected.isEmpty() || existing.isEmpty(); + } + + @Override + public String toString() { + return getClass().getSimpleName() + "{expected=" + expected + + ", existing=" + existing + "}"; + } + } + + static int run(Map> namenodes, Configuration conf) + throws IOException, InterruptedException { + final long sleeptime = + conf.getLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, + DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT) * 2000 + + conf.getLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, + DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT) * 1000; + LOG.info("namenodes = " + namenodes); + + List connectors = Collections.emptyList(); + try { + connectors = NameNodeConnector.newNameNodeConnectors(namenodes, + Mover.class.getSimpleName(), MOVER_ID_PATH, conf); + + while (connectors.size() > 0) { + Collections.shuffle(connectors); + Iterator iter = connectors.iterator(); + while (iter.hasNext()) { + NameNodeConnector nnc = iter.next(); + final Mover m = new Mover(nnc, conf); + final ExitStatus r = m.run(); + + if (r == ExitStatus.SUCCESS) { + IOUtils.cleanup(LOG, nnc); + iter.remove(); + } else if (r != ExitStatus.IN_PROGRESS) { + // must be an error statue, return + return r.getExitCode(); + } + } + Thread.sleep(sleeptime); + } + return ExitStatus.SUCCESS.getExitCode(); + } finally { + for (NameNodeConnector nnc : connectors) { + IOUtils.cleanup(LOG, nnc); + } + } + } + + static class Cli extends Configured implements Tool { + private static final String USAGE = "Usage: java " + + Mover.class.getSimpleName() + " [-p | -f ]" + + "\n\t-p \ta space separated list of HDFS files/dirs to migrate." + + "\n\t-f \ta local file containing a list of HDFS files/dirs to migrate."; + + private static Options buildCliOptions() { + Options opts = new Options(); + Option file = OptionBuilder.withArgName("pathsFile").hasArg() + .withDescription("a local file containing files/dirs to migrate") + .create("f"); + Option paths = OptionBuilder.withArgName("paths").hasArgs() + .withDescription("specify space separated files/dirs to migrate") + .create("p"); + OptionGroup group = new OptionGroup(); + group.addOption(file); + group.addOption(paths); + opts.addOptionGroup(group); + return opts; + } + + private static String[] readPathFile(String file) throws IOException { + List list = Lists.newArrayList(); + BufferedReader reader = new BufferedReader(new FileReader(file)); + try { + String line; + while ((line = reader.readLine()) != null) { + if (!line.trim().isEmpty()) { + list.add(line); + } + } + } finally { + IOUtils.cleanup(LOG, reader); + } + return list.toArray(new String[list.size()]); + } + + private static Map> getNameNodePaths(CommandLine line, + Configuration conf) throws Exception { + Map> map = Maps.newHashMap(); + String[] paths = null; + if (line.hasOption("f")) { + paths = readPathFile(line.getOptionValue("f")); + } else if (line.hasOption("p")) { + paths = line.getOptionValues("p"); + } + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + if (paths == null || paths.length == 0) { + for (URI namenode : namenodes) { + map.put(namenode, null); + } + return map; + } + final URI singleNs = namenodes.size() == 1 ? + namenodes.iterator().next() : null; + for (String path : paths) { + Path target = new Path(path); + if (!target.isUriPathAbsolute()) { + throw new IllegalArgumentException("The path " + target + + " is not absolute"); + } + URI targetUri = target.toUri(); + if ((targetUri.getAuthority() == null || targetUri.getScheme() == + null) && singleNs == null) { + // each path must contains both scheme and authority information + // unless there is only one name service specified in the + // configuration + throw new IllegalArgumentException("The path " + target + + " does not contain scheme and authority thus cannot identify" + + " its name service"); + } + URI key = singleNs; + if (singleNs == null) { + key = new URI(targetUri.getScheme(), targetUri.getAuthority(), + null, null, null); + if (!namenodes.contains(key)) { + throw new IllegalArgumentException("Cannot resolve the path " + + target + ". The namenode services specified in the " + + "configuration: " + namenodes); + } + } + List targets = map.get(key); + if (targets == null) { + targets = Lists.newArrayList(); + map.put(key, targets); + } + targets.add(Path.getPathWithoutSchemeAndAuthority(target)); + } + return map; + } + + @VisibleForTesting + static Map> getNameNodePathsToMove(Configuration conf, + String... args) throws Exception { + final Options opts = buildCliOptions(); + CommandLineParser parser = new GnuParser(); + CommandLine commandLine = parser.parse(opts, args, true); + return getNameNodePaths(commandLine, conf); + } + + @Override + public int run(String[] args) throws Exception { + final long startTime = Time.monotonicNow(); + final Configuration conf = getConf(); + + try { + final Map> map = getNameNodePathsToMove(conf, args); + return Mover.run(map, conf); + } catch (IOException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.IO_EXCEPTION.getExitCode(); + } catch (InterruptedException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.INTERRUPTED.getExitCode(); + } catch (ParseException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.ILLEGAL_ARGUMENTS.getExitCode(); + } catch (IllegalArgumentException e) { + System.out.println(e + ". Exiting ..."); + return ExitStatus.ILLEGAL_ARGUMENTS.getExitCode(); + } finally { + System.out.format("%-24s ", DateFormat.getDateTimeInstance().format(new Date())); + System.out.println("Mover took " + StringUtils.formatTime(Time.monotonicNow()-startTime)); + } + } + } + + /** + * Run a Mover in command line. + * + * @param args Command line arguments + */ + public static void main(String[] args) { + if (DFSUtil.parseHelpArgument(args, Cli.USAGE, System.out, true)) { + System.exit(0); + } + + try { + System.exit(ToolRunner.run(new HdfsConfiguration(), new Cli(), args)); + } catch (Throwable e) { + LOG.error("Exiting " + Mover.class.getSimpleName() + + " due to an exception", e); + System.exit(-1); + } + } +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java index e72ae12fcdef3..e22f8b97423c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java @@ -26,6 +26,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.XAttrSetFlag; @@ -33,6 +34,8 @@ import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; +import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -53,8 +56,8 @@ public class EncryptionZoneManager { public static Logger LOG = LoggerFactory.getLogger(EncryptionZoneManager .class); - private static final EncryptionZone NULL_EZ = - new EncryptionZone("", "", -1); + public static final EncryptionZone NULL_EZ = + new EncryptionZone(-1, "", CipherSuite.UNKNOWN, ""); /** * EncryptionZoneInt is the internal representation of an encryption zone. The @@ -62,21 +65,27 @@ public class EncryptionZoneManager { * contains the EZ's pathname. */ private static class EncryptionZoneInt { - private final String keyName; private final long inodeId; + private final CipherSuite suite; + private final String keyName; - EncryptionZoneInt(long inodeId, String keyName) { - this.keyName = keyName; + EncryptionZoneInt(long inodeId, CipherSuite suite, String keyName) { this.inodeId = inodeId; - } - - String getKeyName() { - return keyName; + this.suite = suite; + this.keyName = keyName; } long getINodeId() { return inodeId; } + + CipherSuite getSuite() { + return suite; + } + + String getKeyName() { + return keyName; + } } private final TreeMap encryptionZones; @@ -109,9 +118,9 @@ public EncryptionZoneManager(FSDirectory dir, Configuration conf) { * @param inodeId of the encryption zone * @param keyName encryption zone key name */ - void addEncryptionZone(Long inodeId, String keyName) { + void addEncryptionZone(Long inodeId, CipherSuite suite, String keyName) { assert dir.hasWriteLock(); - unprotectedAddEncryptionZone(inodeId, keyName); + unprotectedAddEncryptionZone(inodeId, suite, keyName); } /** @@ -122,8 +131,10 @@ void addEncryptionZone(Long inodeId, String keyName) { * @param inodeId of the encryption zone * @param keyName encryption zone key name */ - void unprotectedAddEncryptionZone(Long inodeId, String keyName) { - final EncryptionZoneInt ez = new EncryptionZoneInt(inodeId, keyName); + void unprotectedAddEncryptionZone(Long inodeId, + CipherSuite suite, String keyName) { + final EncryptionZoneInt ez = new EncryptionZoneInt( + inodeId, suite, keyName); encryptionZones.put(inodeId, ez); } @@ -207,8 +218,8 @@ EncryptionZone getEZINodeForPath(INodesInPath iip) { if (ezi == null) { return NULL_EZ; } else { - return new EncryptionZone(getFullPathName(ezi), ezi.getKeyName(), - ezi.getINodeId()); + return new EncryptionZone(ezi.getINodeId(), getFullPathName(ezi), + ezi.getSuite(), ezi.getKeyName()); } } @@ -264,7 +275,7 @@ void checkMoveValidity(INodesInPath srcIIP, INodesInPath dstIIP, String src) *

* Called while holding the FSDirectory lock. */ - XAttr createEncryptionZone(String src, String keyName) + XAttr createEncryptionZone(String src, CipherSuite suite, String keyName) throws IOException { assert dir.hasWriteLock(); if (dir.isNonEmptyDirectory(src)) { @@ -284,8 +295,10 @@ XAttr createEncryptionZone(String src, String keyName) "encryption zone. (" + getFullPathName(ezi) + ")"); } + final HdfsProtos.ZoneEncryptionInfoProto proto = + PBHelper.convert(suite, keyName); final XAttr ezXAttr = XAttrHelper - .buildXAttr(CRYPTO_XATTR_ENCRYPTION_ZONE, keyName.getBytes()); + .buildXAttr(CRYPTO_XATTR_ENCRYPTION_ZONE, proto.toByteArray()); final List xattrs = Lists.newArrayListWithCapacity(1); xattrs.add(ezXAttr); @@ -312,8 +325,23 @@ BatchedListEntries listEncryptionZones(long prevId) int count = 0; for (EncryptionZoneInt ezi : tailMap.values()) { - zones.add(new EncryptionZone(getFullPathName(ezi), - ezi.getKeyName(), ezi.getINodeId())); + /* + Skip EZs that are only present in snapshots. Re-resolve the path to + see if the path's current inode ID matches EZ map's INode ID. + + INode#getFullPathName simply calls getParent recursively, so will return + the INode's parents at the time it was snapshotted. It will not + contain a reference INode. + */ + final String pathName = getFullPathName(ezi); + INodesInPath iip = dir.getINodesInPath(pathName, false); + INode lastINode = iip.getLastINode(); + if (lastINode == null || lastINode.getId() != ezi.getINodeId()) { + continue; + } + // Add the EZ to the result list + zones.add(new EncryptionZone(ezi.getINodeId(), pathName, + ezi.getSuite(), ezi.getKeyName())); count++; if (count >= numResponses) { break; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 836ebd23b8f8d..961808e1caf86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -20,6 +20,7 @@ import static org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_FILE_ENCRYPTION_INFO; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; import static org.apache.hadoop.util.Time.now; import java.io.Closeable; @@ -36,6 +37,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileEncryptionInfo; @@ -51,6 +53,7 @@ import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -62,7 +65,7 @@ import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException; import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException; -import org.apache.hadoop.hdfs.protocol.FsAclPermission; +import org.apache.hadoop.hdfs.protocol.FsPermissionExtension; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; @@ -90,6 +93,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import org.apache.hadoop.security.AccessControlException; /** * Both FSDirectory and FSNamesystem manage the state of the namespace. @@ -128,6 +132,8 @@ private static INodeDirectory createRoot(FSNamesystem namesystem) { DFSUtil.string2Bytes(DOT_INODES_STRING); private final XAttr KEYID_XATTR = XAttrHelper.buildXAttr(CRYPTO_XATTR_ENCRYPTION_ZONE, null); + private final XAttr UNREADABLE_BY_SUPERUSER_XATTR = + XAttrHelper.buildXAttr(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER, null); INodeDirectory rootDir; private final FSNamesystem namesystem; @@ -269,6 +275,12 @@ void disableQuotaChecks() { skipQuotaCheck = true; } + private static INodeFile newINodeFile(long id, PermissionStatus permissions, + long mtime, long atime, short replication, long preferredBlockSize) { + return new INodeFile(id, null, permissions, mtime, atime, + BlockInfo.EMPTY_ARRAY, replication, preferredBlockSize, (byte)0); + } + /** * Add the given filename to the fs. * @throws FileAlreadyExistsException @@ -283,9 +295,8 @@ INodeFile addFile(String path, PermissionStatus permissions, UnresolvedLinkException, SnapshotAccessControlException, AclException { long modTime = now(); - INodeFile newNode = new INodeFile(namesystem.allocateNewInodeId(), null, - permissions, modTime, modTime, BlockInfo.EMPTY_ARRAY, replication, - preferredBlockSize); + INodeFile newNode = newINodeFile(namesystem.allocateNewInodeId(), + permissions, modTime, modTime, replication, preferredBlockSize); newNode.toUnderConstruction(clientName, clientMachine); boolean added = false; @@ -321,14 +332,13 @@ INodeFile unprotectedAddFile( long id, final INodeFile newNode; assert hasWriteLock(); if (underConstruction) { - newNode = new INodeFile(id, null, permissions, modificationTime, - modificationTime, BlockInfo.EMPTY_ARRAY, replication, - preferredBlockSize); + newNode = newINodeFile(id, permissions, modificationTime, + modificationTime, replication, preferredBlockSize); newNode.toUnderConstruction(clientName, clientMachine); } else { - newNode = new INodeFile(id, null, permissions, modificationTime, atime, - BlockInfo.EMPTY_ARRAY, replication, preferredBlockSize); + newNode = newINodeFile(id, permissions, modificationTime, atime, + replication, preferredBlockSize); } try { @@ -995,6 +1005,44 @@ Block[] unprotectedSetReplication(String src, short replication, return file.getBlocks(); } + /** Set block storage policy for a directory */ + void setStoragePolicy(String src, byte policyId) + throws IOException { + writeLock(); + try { + unprotectedSetStoragePolicy(src, policyId); + } finally { + writeUnlock(); + } + } + + void unprotectedSetStoragePolicy(String src, byte policyId) + throws IOException { + assert hasWriteLock(); + final INodesInPath iip = getINodesInPath4Write(src, true); + final INode inode = iip.getLastINode(); + if (inode == null) { + throw new FileNotFoundException("File/Directory does not exist: " + src); + } + final int snapshotId = iip.getLatestSnapshotId(); + if (inode.isFile()) { + inode.asFile().setStoragePolicyID(policyId, snapshotId); + } else if (inode.isDirectory()) { + setDirStoragePolicy(inode.asDirectory(), policyId, snapshotId); + } else { + throw new FileNotFoundException(src + " is not a file or directory"); + } + } + + private void setDirStoragePolicy(INodeDirectory inode, byte policyId, + int latestSnapshotId) throws IOException { + List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); + XAttr xAttr = BlockStoragePolicy.buildXAttr(policyId); + List newXAttrs = setINodeXAttrs(existingXAttrs, Arrays.asList(xAttr), + EnumSet.of(XAttrSetFlag.CREATE, XAttrSetFlag.REPLACE)); + XAttrStorage.updateINodeXAttrs(inode, newXAttrs, latestSnapshotId); + } + /** * @param path the file path * @return the block size of the file. @@ -1326,6 +1374,11 @@ private static void checkSnapshot(INode target, } } + private byte getStoragePolicyID(byte inodePolicy, byte parentPolicy) { + return inodePolicy != BlockStoragePolicy.ID_UNSPECIFIED ? inodePolicy : + parentPolicy; + } + /** * Get a partial listing of the indicated directory * @@ -1340,7 +1393,8 @@ private static void checkSnapshot(INode target, * @return a partial listing starting after startAfter */ DirectoryListing getListing(String src, byte[] startAfter, - boolean needLocation) throws UnresolvedLinkException, IOException { + boolean needLocation, boolean isSuperUser) + throws UnresolvedLinkException, IOException { String srcs = normalizePath(src); final boolean isRawPath = isReservedRawName(src); @@ -1349,16 +1403,20 @@ DirectoryListing getListing(String src, byte[] startAfter, if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) { return getSnapshotsListing(srcs, startAfter); } - final INodesInPath inodesInPath = getLastINodeInPath(srcs, true); + final INodesInPath inodesInPath = getINodesInPath(srcs, true); + final INode[] inodes = inodesInPath.getINodes(); final int snapshot = inodesInPath.getPathSnapshotId(); - final INode targetNode = inodesInPath.getINode(0); + final INode targetNode = inodes[inodes.length - 1]; if (targetNode == null) return null; + byte parentStoragePolicy = isSuperUser ? + targetNode.getStoragePolicyID() : BlockStoragePolicy.ID_UNSPECIFIED; if (!targetNode.isDirectory()) { return new DirectoryListing( new HdfsFileStatus[]{createFileStatus(HdfsFileStatus.EMPTY_NAME, - targetNode, needLocation, snapshot, isRawPath)}, 0); + targetNode, needLocation, parentStoragePolicy, snapshot, + isRawPath, inodesInPath)}, 0); } final INodeDirectory dirInode = targetNode.asDirectory(); @@ -1371,8 +1429,11 @@ DirectoryListing getListing(String src, byte[] startAfter, HdfsFileStatus listing[] = new HdfsFileStatus[numOfListing]; for (int i=0; i0; i++) { INode cur = contents.get(startChild+i); - listing[i] = createFileStatus(cur.getLocalNameBytes(), cur, - needLocation, snapshot, isRawPath); + byte curPolicy = isSuperUser && !cur.isSymlink()? + cur.getLocalStoragePolicyID(): BlockStoragePolicy.ID_UNSPECIFIED; + listing[i] = createFileStatus(cur.getLocalNameBytes(), cur, needLocation, + getStoragePolicyID(curPolicy, parentStoragePolicy), snapshot, + isRawPath, inodesInPath); listingCnt++; if (needLocation) { // Once we hit lsLimit locations, stop. @@ -1423,7 +1484,8 @@ private DirectoryListing getSnapshotsListing(String src, byte[] startAfter) for (int i = 0; i < numOfListing; i++) { Root sRoot = snapshots.get(i + skipSize).getRoot(); listing[i] = createFileStatus(sRoot.getLocalNameBytes(), sRoot, - Snapshot.CURRENT_STATE_ID, false); + BlockStoragePolicy.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID, + false, null); } return new DirectoryListing( listing, snapshots.size() - skipSize - numOfListing); @@ -1433,10 +1495,12 @@ private DirectoryListing getSnapshotsListing(String src, byte[] startAfter) * @param src The string representation of the path to the file * @param resolveLink whether to throw UnresolvedLinkException * @param isRawPath true if a /.reserved/raw pathname was passed by the user + * @param includeStoragePolicy whether to include storage policy * @return object containing information regarding the file * or null if file not found */ - HdfsFileStatus getFileInfo(String src, boolean resolveLink, boolean isRawPath) + HdfsFileStatus getFileInfo(String src, boolean resolveLink, + boolean isRawPath, boolean includeStoragePolicy) throws IOException { String srcs = normalizePath(src); readLock(); @@ -1444,11 +1508,14 @@ HdfsFileStatus getFileInfo(String src, boolean resolveLink, boolean isRawPath) if (srcs.endsWith(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR)) { return getFileInfo4DotSnapshot(srcs); } - final INodesInPath inodesInPath = getLastINodeInPath(srcs, resolveLink); - final INode i = inodesInPath.getINode(0); - - return i == null? null: createFileStatus(HdfsFileStatus.EMPTY_NAME, i, - inodesInPath.getPathSnapshotId(), isRawPath); + final INodesInPath inodesInPath = getINodesInPath(srcs, resolveLink); + final INode[] inodes = inodesInPath.getINodes(); + final INode i = inodes[inodes.length - 1]; + byte policyId = includeStoragePolicy && i != null && !i.isSymlink() ? + i.getStoragePolicyID() : BlockStoragePolicy.ID_UNSPECIFIED; + return i == null ? null : createFileStatus(HdfsFileStatus.EMPTY_NAME, i, + policyId, inodesInPath.getPathSnapshotId(), isRawPath, + inodesInPath); } finally { readUnlock(); } @@ -1465,7 +1532,7 @@ private HdfsFileStatus getFileInfo4DotSnapshot(String src) throws UnresolvedLinkException { if (getINode4DotSnapshot(src) != null) { return new HdfsFileStatus(0, true, 0, 0, 0, 0, null, null, null, null, - HdfsFileStatus.EMPTY_NAME, -1L, 0, null); + HdfsFileStatus.EMPTY_NAME, -1L, 0, null, BlockStoragePolicy.ID_UNSPECIFIED); } return null; } @@ -2100,8 +2167,17 @@ public final void addToInodeMap(INode inode) { for (XAttr xattr : xattrs) { final String xaName = XAttrHelper.getPrefixName(xattr); if (CRYPTO_XATTR_ENCRYPTION_ZONE.equals(xaName)) { - ezManager.unprotectedAddEncryptionZone(inode.getId(), - new String(xattr.getValue())); + try { + final HdfsProtos.ZoneEncryptionInfoProto ezProto = + HdfsProtos.ZoneEncryptionInfoProto.parseFrom( + xattr.getValue()); + ezManager.unprotectedAddEncryptionZone(inode.getId(), + PBHelper.convert(ezProto.getSuite()), + ezProto.getKeyName()); + } catch (InvalidProtocolBufferException e) { + NameNode.LOG.warn("Error parsing protocol buffer of " + + "EZ XAttr " + xattr.getName()); + } } } } @@ -2293,34 +2369,45 @@ void reset() { * @throws IOException if any error occurs */ private HdfsFileStatus createFileStatus(byte[] path, INode node, - boolean needLocation, int snapshot, boolean isRawPath) + boolean needLocation, byte storagePolicy, int snapshot, + boolean isRawPath, INodesInPath iip) throws IOException { if (needLocation) { - return createLocatedFileStatus(path, node, snapshot, isRawPath); + return createLocatedFileStatus(path, node, storagePolicy, snapshot, + isRawPath, iip); } else { - return createFileStatus(path, node, snapshot, isRawPath); + return createFileStatus(path, node, storagePolicy, snapshot, + isRawPath, iip); } } + /** * Create FileStatus by file INode */ - HdfsFileStatus createFileStatus(byte[] path, INode node, - int snapshot, boolean isRawPath) throws IOException { + HdfsFileStatus createFileStatus(byte[] path, INode node, byte storagePolicy, + int snapshot, boolean isRawPath, INodesInPath iip) throws IOException { long size = 0; // length is zero for directories short replication = 0; long blocksize = 0; + final boolean isEncrypted; + + final FileEncryptionInfo feInfo = isRawPath ? null : + getFileEncryptionInfo(node, snapshot, iip); + if (node.isFile()) { final INodeFile fileNode = node.asFile(); size = fileNode.computeFileSize(snapshot); replication = fileNode.getFileReplication(snapshot); blocksize = fileNode.getPreferredBlockSize(); + isEncrypted = (feInfo != null) || + (isRawPath && isInAnEZ(INodesInPath.fromINode(node))); + } else { + isEncrypted = isInAnEZ(INodesInPath.fromINode(node)); } + int childrenNum = node.isDirectory() ? node.asDirectory().getChildrenNum(snapshot) : 0; - FileEncryptionInfo feInfo = isRawPath ? null : - getFileEncryptionInfo(node, snapshot); - return new HdfsFileStatus( size, node.isDirectory(), @@ -2328,28 +2415,31 @@ HdfsFileStatus createFileStatus(byte[] path, INode node, blocksize, node.getModificationTime(snapshot), node.getAccessTime(snapshot), - getPermissionForFileStatus(node, snapshot), + getPermissionForFileStatus(node, snapshot, isEncrypted), node.getUserName(snapshot), node.getGroupName(snapshot), node.isSymlink() ? node.asSymlink().getSymlink() : null, path, node.getId(), childrenNum, - feInfo); + feInfo, + storagePolicy); } /** * Create FileStatus with location info by file INode */ - private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path, - INode node, int snapshot, boolean isRawPath) throws IOException { + private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path, INode node, + byte storagePolicy, int snapshot, boolean isRawPath, + INodesInPath iip) throws IOException { assert hasReadLock(); long size = 0; // length is zero for directories short replication = 0; long blocksize = 0; LocatedBlocks loc = null; + final boolean isEncrypted; final FileEncryptionInfo feInfo = isRawPath ? null : - getFileEncryptionInfo(node, snapshot); + getFileEncryptionInfo(node, snapshot, iip); if (node.isFile()) { final INodeFile fileNode = node.asFile(); size = fileNode.computeFileSize(snapshot); @@ -2367,6 +2457,10 @@ private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path, if (loc == null) { loc = new LocatedBlocks(); } + isEncrypted = (feInfo != null) || + (isRawPath && isInAnEZ(INodesInPath.fromINode(node))); + } else { + isEncrypted = isInAnEZ(INodesInPath.fromINode(node)); } int childrenNum = node.isDirectory() ? node.asDirectory().getChildrenNum(snapshot) : 0; @@ -2375,10 +2469,10 @@ private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path, new HdfsLocatedFileStatus(size, node.isDirectory(), replication, blocksize, node.getModificationTime(snapshot), node.getAccessTime(snapshot), - getPermissionForFileStatus(node, snapshot), + getPermissionForFileStatus(node, snapshot, isEncrypted), node.getUserName(snapshot), node.getGroupName(snapshot), node.isSymlink() ? node.asSymlink().getSymlink() : null, path, - node.getId(), loc, childrenNum, feInfo); + node.getId(), loc, childrenNum, feInfo, storagePolicy); // Set caching information for the located blocks. if (loc != null) { CacheManager cacheManager = namesystem.getCacheManager(); @@ -2391,17 +2485,21 @@ private HdfsLocatedFileStatus createLocatedFileStatus(byte[] path, /** * Returns an inode's FsPermission for use in an outbound FileStatus. If the - * inode has an ACL, then this method will convert to a FsAclPermission. + * inode has an ACL or is for an encrypted file/dir, then this method will + * return an FsPermissionExtension. * * @param node INode to check * @param snapshot int snapshot ID + * @param isEncrypted boolean true if the file/dir is encrypted * @return FsPermission from inode, with ACL bit on if the inode has an ACL + * and encrypted bit on if it represents an encrypted file/dir. */ private static FsPermission getPermissionForFileStatus(INode node, - int snapshot) { + int snapshot, boolean isEncrypted) { FsPermission perm = node.getFsPermission(snapshot); - if (node.getAclFeature(snapshot) != null) { - perm = new FsAclPermission(perm); + boolean hasAcl = node.getAclFeature(snapshot) != null; + if (hasAcl || isEncrypted) { + perm = new FsPermissionExtension(perm, hasAcl, isEncrypted); } return perm; } @@ -2611,7 +2709,8 @@ List unprotectedRemoveXAttrs(final String src, */ @VisibleForTesting List filterINodeXAttrs(final List existingXAttrs, - final List toFilter, final List filtered) { + final List toFilter, final List filtered) + throws AccessControlException { if (existingXAttrs == null || existingXAttrs.isEmpty() || toFilter == null || toFilter.isEmpty()) { return existingXAttrs; @@ -2627,6 +2726,10 @@ List filterINodeXAttrs(final List existingXAttrs, XAttr filter = it.next(); Preconditions.checkArgument(!KEYID_XATTR.equalsIgnoreValue(filter), "The encryption zone xattr should never be deleted."); + if (UNREADABLE_BY_SUPERUSER_XATTR.equalsIgnoreValue(filter)) { + throw new AccessControlException("The xattr '" + + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + "' can not be deleted."); + } if (a.equalsIgnoreValue(filter)) { add = false; it.remove(); @@ -2661,11 +2764,11 @@ String getKeyName(INodesInPath iip) { } } - XAttr createEncryptionZone(String src, String keyName) + XAttr createEncryptionZone(String src, CipherSuite suite, String keyName) throws IOException { writeLock(); try { - return ezManager.createEncryptionZone(src, keyName); + return ezManager.createEncryptionZone(src, suite, keyName); } finally { writeUnlock(); } @@ -2696,7 +2799,8 @@ BatchedListEntries listEncryptionZones(long prevId) void setFileEncryptionInfo(String src, FileEncryptionInfo info) throws IOException { // Make the PB for the xattr - final HdfsProtos.FileEncryptionInfoProto proto = PBHelper.convert(info); + final HdfsProtos.PerFileEncryptionInfoProto proto = + PBHelper.convertPerFileEncInfo(info); final byte[] protoBytes = proto.toByteArray(); final XAttr fileEncryptionAttr = XAttrHelper.buildXAttr(CRYPTO_XATTR_FILE_ENCRYPTION_INFO, protoBytes); @@ -2712,35 +2816,64 @@ void setFileEncryptionInfo(String src, FileEncryptionInfo info) } /** - * Return the FileEncryptionInfo for an INode, or null if the INode is not - * an encrypted file. - */ - FileEncryptionInfo getFileEncryptionInfo(INode inode, int snapshotId) - throws IOException { + * This function combines the per-file encryption info (obtained + * from the inode's XAttrs), and the encryption info from its zone, and + * returns a consolidated FileEncryptionInfo instance. Null is returned + * for non-encrypted files. + * + * @param inode inode of the file + * @param snapshotId ID of the snapshot that + * we want to get encryption info from + * @param iip inodes in the path containing the file, passed in to + * avoid obtaining the list of inodes again; if iip is + * null then the list of inodes will be obtained again + * @return consolidated file encryption info; null for non-encrypted files + */ + FileEncryptionInfo getFileEncryptionInfo(INode inode, int snapshotId, + INodesInPath iip) throws IOException { if (!inode.isFile()) { return null; } readLock(); try { - List xAttrs = XAttrStorage.readINodeXAttrs(inode, snapshotId); - if (xAttrs == null) { - return null; + if (iip == null) { + iip = getINodesInPath(inode.getFullPathName(), true); } - for (XAttr x : xAttrs) { - if (XAttrHelper.getPrefixName(x) - .equals(CRYPTO_XATTR_FILE_ENCRYPTION_INFO)) { - try { - HdfsProtos.FileEncryptionInfoProto proto = - HdfsProtos.FileEncryptionInfoProto.parseFrom(x.getValue()); - FileEncryptionInfo feInfo = PBHelper.convert(proto); - return feInfo; - } catch (InvalidProtocolBufferException e) { - throw new IOException("Could not parse file encryption info for " + - "inode " + inode, e); - } + EncryptionZone encryptionZone = getEZForPath(iip); + if (encryptionZone == null || + encryptionZone.equals(EncryptionZoneManager.NULL_EZ)) { + // not an encrypted file + return null; + } else if(encryptionZone.getPath() == null + || encryptionZone.getPath().isEmpty()) { + if (NameNode.LOG.isDebugEnabled()) { + NameNode.LOG.debug("Encryption zone " + + encryptionZone.getPath() + " does not have a valid path."); } } - return null; + + CipherSuite suite = encryptionZone.getSuite(); + String keyName = encryptionZone.getKeyName(); + + XAttr fileXAttr = unprotectedGetXAttrByName(inode, snapshotId, + CRYPTO_XATTR_FILE_ENCRYPTION_INFO); + + if (fileXAttr == null) { + NameNode.LOG.warn("Could not find encryption XAttr for file " + + inode.getFullPathName() + " in encryption zone " + + encryptionZone.getPath()); + return null; + } + + try { + HdfsProtos.PerFileEncryptionInfoProto fileProto = + HdfsProtos.PerFileEncryptionInfoProto.parseFrom( + fileXAttr.getValue()); + return PBHelper.convert(fileProto, suite, keyName); + } catch (InvalidProtocolBufferException e) { + throw new IOException("Could not parse file encryption info for " + + "inode " + inode, e); + } } finally { readUnlock(); } @@ -2765,15 +2898,26 @@ INode unprotectedSetXAttrs(final String src, final List xAttrs, int snapshotId = iip.getLatestSnapshotId(); List existingXAttrs = XAttrStorage.readINodeXAttrs(inode); List newXAttrs = setINodeXAttrs(existingXAttrs, xAttrs, flag); + final boolean isFile = inode.isFile(); - /* - * If we're adding the encryption zone xattr, then add src to the list - * of encryption zones. - */ for (XAttr xattr : newXAttrs) { final String xaName = XAttrHelper.getPrefixName(xattr); + + /* + * If we're adding the encryption zone xattr, then add src to the list + * of encryption zones. + */ if (CRYPTO_XATTR_ENCRYPTION_ZONE.equals(xaName)) { - ezManager.addEncryptionZone(inode.getId(), new String(xattr.getValue())); + final HdfsProtos.ZoneEncryptionInfoProto ezProto = + HdfsProtos.ZoneEncryptionInfoProto.parseFrom(xattr.getValue()); + ezManager.addEncryptionZone(inode.getId(), + PBHelper.convert(ezProto.getSuite()), + ezProto.getKeyName()); + } + + if (!isFile && SECURITY_XATTR_UNREADABLE_BY_SUPERUSER.equals(xaName)) { + throw new IOException("Can only set '" + + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + "' on a file."); } } @@ -2865,12 +3009,42 @@ List getXAttrs(String src) throws IOException { INodesInPath iip = getLastINodeInPath(srcs, true); INode inode = resolveLastINode(src, iip); int snapshotId = iip.getPathSnapshotId(); - return XAttrStorage.readINodeXAttrs(inode, snapshotId); + return unprotectedGetXAttrs(inode, snapshotId); + } finally { + readUnlock(); + } + } + + List getXAttrs(INode inode, int snapshotId) throws IOException { + readLock(); + try { + return unprotectedGetXAttrs(inode, snapshotId); } finally { readUnlock(); } } + private List unprotectedGetXAttrs(INode inode, int snapshotId) + throws IOException { + return XAttrStorage.readINodeXAttrs(inode, snapshotId); + } + + private XAttr unprotectedGetXAttrByName(INode inode, int snapshotId, + String xAttrName) + throws IOException { + List xAttrs = XAttrStorage.readINodeXAttrs(inode, snapshotId); + if (xAttrs == null) { + return null; + } + for (XAttr x : xAttrs) { + if (XAttrHelper.getPrefixName(x) + .equals(xAttrName)) { + return x; + } + } + return null; + } + private static INode resolveLastINode(String src, INodesInPath iip) throws FileNotFoundException { INode inode = iip.getLastINode(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index a157f4f9ab03f..d0f2d0a129930 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp; @@ -829,7 +830,16 @@ void logSetReplication(String src, short replication) { .setReplication(replication); logEdit(op); } - + + /** + * Add set storage policy id record to edit log + */ + void logSetStoragePolicy(String src, byte policyId) { + SetStoragePolicyOp op = SetStoragePolicyOp.getInstance(cache.get()) + .setPath(src).setPolicyId(policyId); + logEdit(op); + } + /** Add set namespace quota record to edit log * * @param src the string representation of the path to a directory diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 6afd81ccb2a43..cc0572ef69129 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -33,6 +33,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -78,6 +79,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetPermissionsOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp; +import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetStoragePolicyOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetXAttrOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.RemoveXAttrOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp; @@ -339,8 +341,10 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, // 3. OP_ADD to open file for append // See if the file already exists (persistBlocks call) - final INodesInPath iip = fsDir.getLastINodeInPath(path); - INodeFile oldFile = INodeFile.valueOf(iip.getINode(0), path, true); + final INodesInPath iip = fsDir.getINodesInPath(path, true); + final INode[] inodes = iip.getINodes(); + INodeFile oldFile = INodeFile.valueOf( + inodes[inodes.length - 1], path, true); if (oldFile != null && addCloseOp.overwrite) { // This is OP_ADD with overwrite fsDir.unprotectedDelete(path, addCloseOp.mtime); @@ -368,8 +372,9 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, // add the op into retry cache if necessary if (toAddRetryCache) { HdfsFileStatus stat = fsNamesys.dir.createFileStatus( - HdfsFileStatus.EMPTY_NAME, newFile, Snapshot.CURRENT_STATE_ID, - false); + HdfsFileStatus.EMPTY_NAME, newFile, + BlockStoragePolicy.ID_UNSPECIFIED, Snapshot.CURRENT_STATE_ID, + false, iip); fsNamesys.addCacheEntryWithPayload(addCloseOp.rpcClientId, addCloseOp.rpcCallId, stat); } @@ -832,6 +837,13 @@ private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, } break; } + case OP_SET_STORAGE_POLICY: { + SetStoragePolicyOp setStoragePolicyOp = (SetStoragePolicyOp) op; + fsDir.unprotectedSetStoragePolicy( + renameReservedPathsOnUpgrade(setStoragePolicyOp.path, logVersion), + setStoragePolicyOp.policyId); + break; + } default: throw new IOException("Invalid operation read " + op.opCode); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index c5dd3df367b97..112c3c1067c0a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -61,6 +61,7 @@ import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_TIMES; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_UPDATE_BLOCKS; import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_UPDATE_MASTER_KEY; +import static org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes.OP_SET_STORAGE_POLICY; import java.io.DataInput; import java.io.DataInputStream; @@ -193,6 +194,7 @@ public OpInstanceCache() { OP_ROLLING_UPGRADE_FINALIZE, "finalize")); inst.put(OP_SET_XATTR, new SetXAttrOp()); inst.put(OP_REMOVE_XATTR, new RemoveXAttrOp()); + inst.put(OP_SET_STORAGE_POLICY, new SetStoragePolicyOp()); } public FSEditLogOp get(FSEditLogOpCodes opcode) { @@ -3798,6 +3800,71 @@ static class RollbackException extends IOException { } } + /** {@literal @Idempotent} for {@link ClientProtocol#setStoragePolicy} */ + static class SetStoragePolicyOp extends FSEditLogOp { + String path; + byte policyId; + + private SetStoragePolicyOp() { + super(OP_SET_STORAGE_POLICY); + } + + static SetStoragePolicyOp getInstance(OpInstanceCache cache) { + return (SetStoragePolicyOp) cache.get(OP_SET_STORAGE_POLICY); + } + + SetStoragePolicyOp setPath(String path) { + this.path = path; + return this; + } + + SetStoragePolicyOp setPolicyId(byte policyId) { + this.policyId = policyId; + return this; + } + + @Override + public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeString(path, out); + out.writeByte(policyId); + } + + @Override + void readFields(DataInputStream in, int logVersion) + throws IOException { + this.path = FSImageSerialization.readString(in); + this.policyId = in.readByte(); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("SetStoragePolicyOp [path="); + builder.append(path); + builder.append(", policyId="); + builder.append(policyId); + builder.append(", opCode="); + builder.append(opCode); + builder.append(", txid="); + builder.append(txid); + builder.append("]"); + return builder.toString(); + } + + @Override + protected void toXml(ContentHandler contentHandler) throws SAXException { + XMLUtils.addSaxString(contentHandler, "PATH", path); + XMLUtils.addSaxString(contentHandler, "POLICYID", + Byte.valueOf(policyId).toString()); + } + + @Override + void fromXml(Stanza st) throws InvalidXmlException { + this.path = st.getValue("PATH"); + this.policyId = Byte.valueOf(st.getValue("POLICYID")); + } + } + /** * Class for writing editlog ops */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index bf4bbb4a60bd7..86be54adb7f4f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -72,6 +72,7 @@ public enum FSEditLogOpCodes { OP_ROLLING_UPGRADE_FINALIZE ((byte) 42), OP_SET_XATTR ((byte) 43), OP_REMOVE_XATTR ((byte) 44), + OP_SET_STORAGE_POLICY ((byte) 45), // Note that the current range of the valid OP code is 0~127 OP_INVALID ((byte) -1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 5b6d269546b96..af3cf2c06fd21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -784,7 +784,7 @@ INode loadINode(final byte[] localName, boolean isSnapshotINode, counter.increment(); } final INodeFile file = new INodeFile(inodeId, localName, permissions, - modificationTime, atime, blocks, replication, blockSize); + modificationTime, atime, blocks, replication, blockSize, (byte)0); if (underConstruction) { file.toUnderConstruction(clientName, clientMachine); } @@ -885,7 +885,7 @@ public INodeFileAttributes loadINodeFileAttributes(DataInput in) final long preferredBlockSize = in.readLong(); return new INodeFileAttributes.SnapshotCopy(name, permissions, null, modificationTime, - accessTime, replication, preferredBlockSize, null); + accessTime, replication, preferredBlockSize, (byte)0, null); } public INodeDirectoryAttributes loadINodeDirectoryAttributes(DataInput in) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 538e98ae3da3d..321a14855edad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -290,7 +290,8 @@ private INodeFile loadINodeFile(INodeSection.INode n) { final INodeFile file = new INodeFile(n.getId(), n.getName().toByteArray(), permissions, f.getModificationTime(), - f.getAccessTime(), blocks, replication, f.getPreferredBlockSize()); + f.getAccessTime(), blocks, replication, f.getPreferredBlockSize(), + (byte)f.getStoragePolicyID()); if (f.hasAcl()) { file.addAclFeature(new AclFeature(loadAclEntries(f.getAcl(), @@ -401,7 +402,8 @@ public static INodeSection.INodeFile.Builder buildINodeFile( .setModificationTime(file.getModificationTime()) .setPermission(buildPermissionStatus(file, state.getStringMap())) .setPreferredBlockSize(file.getPreferredBlockSize()) - .setReplication(file.getFileReplication()); + .setReplication(file.getFileReplication()) + .setStoragePolicyID(file.getLocalStoragePolicyID()); AclFeature f = file.getAclFeature(); if (f != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index 3956d9a5894c0..be70f4bd6cedd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -149,7 +149,7 @@ static INodeFile readINodeUnderConstruction( assert numLocs == 0 : "Unexpected block locations"; INodeFile file = new INodeFile(inodeId, name, perm, modificationTime, - modificationTime, blocks, blockReplication, preferredBlockSize); + modificationTime, blocks, blockReplication, preferredBlockSize, (byte)0); file.toUnderConstruction(clientName, clientMachine); return file; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index de2fdea05548d..876cf49659a25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.hadoop.crypto.key.KeyProvider.KeyVersion; import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension .EncryptedKeyVersion; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; @@ -65,8 +64,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY; @@ -88,6 +85,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; import static org.apache.hadoop.util.Time.now; import java.io.BufferedWriter; @@ -135,6 +133,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CipherSuite; +import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import org.apache.hadoop.fs.CacheFlag; @@ -161,11 +160,12 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.ServiceFailedException; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.UnknownCipherSuiteException; import org.apache.hadoop.hdfs.protocol.AclException; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; @@ -334,7 +334,7 @@ public boolean isAuditEnabled() { private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink) throws IOException { return (isAuditEnabled() && isExternalInvocation()) - ? dir.getFileInfo(path, resolveSymlink, false) : null; + ? dir.getFileInfo(path, resolveSymlink, false, false) : null; } private void logAuditEvent(boolean succeeded, String cmd, String src) @@ -544,8 +544,6 @@ private void logAuditEvent(boolean succeeded, private final FSImage fsImage; - private boolean randomizeBlockLocationsPerBlock; - /** * Notify that loading of this FSDirectory is complete, and * it is imageLoaded for use @@ -862,10 +860,6 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT); - this.randomizeBlockLocationsPerBlock = conf.getBoolean( - DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK, - DFS_NAMENODE_RANDOMIZE_BLOCK_LOCATIONS_PER_BLOCK_DEFAULT); - this.dtSecretManager = createDelegationTokenSecretManager(conf); this.dir = new FSDirectory(this, conf); this.snapshotManager = new SnapshotManager(dir); @@ -1161,8 +1155,9 @@ void startActiveServices() throws IOException { cacheManager.startMonitorThread(); blockManager.getDatanodeManager().setShouldSendCachingCommands(true); } finally { - writeUnlock(); startingActiveService = false; + checkSafeMode(); + writeUnlock(); } } @@ -1736,7 +1731,7 @@ LocatedBlocks getBlockLocations(String clientMachine, String src, true); if (blocks != null) { blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine, - blocks.getLocatedBlocks(), randomizeBlockLocationsPerBlock); + blocks.getLocatedBlocks()); // lastBlock is not part of getLocatedBlocks(), might need to sort it too LocatedBlock lastBlock = blocks.getLastLocatedBlock(); @@ -1745,7 +1740,7 @@ LocatedBlocks getBlockLocations(String clientMachine, String src, Lists.newArrayListWithCapacity(1); lastBlockList.add(lastBlock); blockManager.getDatanodeManager().sortLocatedBlocks(clientMachine, - lastBlockList, randomizeBlockLocationsPerBlock); + lastBlockList); } } return blocks; @@ -1837,8 +1832,13 @@ private LocatedBlocks getBlockLocationsUpdateTimes(final String srcArg, doAccessTime = false; } - final INodesInPath iip = dir.getLastINodeInPath(src); - final INodeFile inode = INodeFile.valueOf(iip.getLastINode(), src); + final INodesInPath iip = dir.getINodesInPath(src, true); + final INode[] inodes = iip.getINodes(); + final INodeFile inode = INodeFile.valueOf( + inodes[inodes.length - 1], src); + if (isPermissionEnabled) { + checkUnreadableBySuperuser(pc, inode, iip.getPathSnapshotId()); + } if (!iip.isSnapshot() //snapshots are readonly, so don't update atime. && doAccessTime && isAccessTimeSupported()) { final long now = now(); @@ -1868,7 +1868,8 @@ && doAccessTime && isAccessTimeSupported()) { final FileEncryptionInfo feInfo = FSDirectory.isReservedRawName(srcArg) ? - null : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId()); + null : dir.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), + iip); final LocatedBlocks blocks = blockManager.createLocatedBlocks(inode.getBlocks(), fileSize, @@ -2255,6 +2256,52 @@ private boolean setReplicationInt(final String srcArg, return isFile; } + /** + * Set the storage policy for a file or a directory. + * + * @param src file/directory path + * @param policyName storage policy name + */ + void setStoragePolicy(String src, final String policyName) + throws IOException { + try { + setStoragePolicyInt(src, policyName); + } catch (AccessControlException e) { + logAuditEvent(false, "setStoragePolicy", src); + throw e; + } + } + + private void setStoragePolicyInt(String src, final String policyName) + throws IOException { + checkSuperuserPrivilege(); + checkOperation(OperationCategory.WRITE); + byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); + waitForLoadingFSImage(); + HdfsFileStatus fileStat; + writeLock(); + try { + checkOperation(OperationCategory.WRITE); + checkNameNodeSafeMode("Cannot set storage policy for " + src); + src = FSDirectory.resolvePath(src, pathComponents, dir); + + // get the corresponding policy and make sure the policy name is valid + BlockStoragePolicy policy = blockManager.getStoragePolicy(policyName); + if (policy == null) { + throw new HadoopIllegalArgumentException( + "Cannot find a block policy with the name " + policyName); + } + dir.setStoragePolicy(src, policy.getId()); + getEditLog().logSetStoragePolicy(src, policy.getId()); + fileStat = getAuditFileInfo(src, false); + } finally { + writeUnlock(); + } + + getEditLog().logSync(); + logAuditEvent(true, "setStoragePolicy", src, null, fileStat); + } + long getPreferredBlockSize(String filename) throws IOException, UnresolvedLinkException { FSPermissionChecker pc = getPermissionChecker(); @@ -2438,84 +2485,66 @@ private HdfsFileStatus startFileInt(final String srcArg, waitForLoadingFSImage(); - /* - * We want to avoid holding any locks while doing KeyProvider operations, - * since they can be very slow. Since the path can - * flip flop between being in an encryption zone and not in the meantime, - * we need to recheck the preconditions and redo KeyProvider operations - * in some situations. - * - * A special RetryStartFileException is used to indicate that we should - * retry creation of a FileEncryptionInfo. + /** + * If the file is in an encryption zone, we optimistically create an + * EDEK for the file by calling out to the configured KeyProvider. + * Since this typically involves doing an RPC, we take the readLock + * initially, then drop it to do the RPC. + * + * Since the path can flip-flop between being in an encryption zone and not + * in the meantime, we need to recheck the preconditions when we retake the + * lock to do the create. If the preconditions are not met, we throw a + * special RetryStartFileException to ask the DFSClient to try the create + * again later. */ - BlocksMapUpdateInfo toRemoveBlocks = null; + CipherSuite suite = null; + String ezKeyName = null; + readLock(); try { - boolean shouldContinue = true; - int iters = 0; - while (shouldContinue) { - skipSync = false; - if (iters >= 10) { - throw new IOException("Too many retries because of encryption zone " + - "operations, something might be broken!"); - } - shouldContinue = false; - iters++; - - // Optimistically determine CipherSuite and ezKeyName if the path is - // currently within an encryption zone - CipherSuite suite = null; - String ezKeyName = null; - readLock(); - try { - src = resolvePath(src, pathComponents); - INodesInPath iip = dir.getINodesInPath4Write(src); - // Nothing to do if the path is not within an EZ - if (dir.isInAnEZ(iip)) { - suite = chooseCipherSuite(iip, cipherSuites); - if (suite != null) { - Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), - "Chose an UNKNOWN CipherSuite!"); - } - ezKeyName = dir.getKeyName(iip); - Preconditions.checkState(ezKeyName != null); - } - } finally { - readUnlock(); + src = resolvePath(src, pathComponents); + INodesInPath iip = dir.getINodesInPath4Write(src); + // Nothing to do if the path is not within an EZ + if (dir.isInAnEZ(iip)) { + suite = chooseCipherSuite(iip, cipherSuites); + if (suite != null) { + Preconditions.checkArgument(!suite.equals(CipherSuite.UNKNOWN), + "Chose an UNKNOWN CipherSuite!"); } + ezKeyName = dir.getKeyName(iip); + Preconditions.checkState(ezKeyName != null); + } + } finally { + readUnlock(); + } - Preconditions.checkState( - (suite == null && ezKeyName == null) || + Preconditions.checkState( + (suite == null && ezKeyName == null) || (suite != null && ezKeyName != null), - "Both suite and ezKeyName should both be null or not null"); - // Generate EDEK if necessary while not holding the lock - EncryptedKeyVersion edek = - generateEncryptedDataEncryptionKey(ezKeyName); - EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); - // Try to create the file with the computed cipher suite and EDEK - writeLock(); - try { - checkOperation(OperationCategory.WRITE); - checkNameNodeSafeMode("Cannot create file" + src); - src = resolvePath(src, pathComponents); - toRemoveBlocks = startFileInternal(pc, src, permissions, holder, - clientMachine, create, overwrite, createParent, replication, - blockSize, suite, edek, logRetryCache); - stat = dir.getFileInfo(src, false, - FSDirectory.isReservedRawName(srcArg)); - } catch (StandbyException se) { - skipSync = true; - throw se; - } catch (RetryStartFileException e) { - shouldContinue = true; - if (LOG.isTraceEnabled()) { - LOG.trace("Preconditions failed, retrying creation of " + - "FileEncryptionInfo", e); - } - } finally { - writeUnlock(); - } - } + "Both suite and ezKeyName should both be null or not null"); + + // Generate EDEK if necessary while not holding the lock + EncryptedKeyVersion edek = + generateEncryptedDataEncryptionKey(ezKeyName); + EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); + + // Proceed with the create, using the computed cipher suite and + // generated EDEK + BlocksMapUpdateInfo toRemoveBlocks = null; + writeLock(); + try { + checkOperation(OperationCategory.WRITE); + checkNameNodeSafeMode("Cannot create file" + src); + src = resolvePath(src, pathComponents); + toRemoveBlocks = startFileInternal(pc, src, permissions, holder, + clientMachine, create, overwrite, createParent, replication, + blockSize, suite, edek, logRetryCache); + stat = dir.getFileInfo(src, false, + FSDirectory.isReservedRawName(srcArg), false); + } catch (StandbyException se) { + skipSync = true; + throw se; } finally { + writeUnlock(); // There might be transactions logged while trying to recover the lease. // They need to be sync'ed even when an exception was thrown. if (!skipSync) { @@ -2572,7 +2601,7 @@ private BlocksMapUpdateInfo startFileInternal(FSPermissionChecker pc, feInfo = new FileEncryptionInfo(suite, edek.getEncryptedKeyVersion().getMaterial(), edek.getEncryptedKeyIv(), - edek.getEncryptionKeyVersionName()); + ezKeyName, edek.getEncryptionKeyVersionName()); Preconditions.checkNotNull(feInfo); } @@ -2976,8 +3005,9 @@ LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, throws LeaseExpiredException, NotReplicatedYetException, QuotaExceededException, SafeModeException, UnresolvedLinkException, IOException { - long blockSize; - int replication; + final long blockSize; + final int replication; + final byte storagePolicyID; DatanodeDescriptor clientNode = null; if(NameNode.stateChangeLog.isDebugEnabled()) { @@ -3012,13 +3042,15 @@ LocatedBlock getAdditionalBlock(String src, long fileId, String clientName, clientNode = blockManager.getDatanodeManager().getDatanodeByHost( pendingFile.getFileUnderConstructionFeature().getClientMachine()); replication = pendingFile.getFileReplication(); + storagePolicyID = pendingFile.getStoragePolicyID(); } finally { readUnlock(); } // choose targets for the new block to be allocated. - final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget( - src, replication, clientNode, excludedNodes, blockSize, favoredNodes); + final DatanodeStorageInfo targets[] = getBlockManager().chooseTarget4NewBlock( + src, replication, clientNode, excludedNodes, blockSize, favoredNodes, + storagePolicyID); // Part II. // Allocate a new block, add it to the INode and the BlocksMap. @@ -3206,6 +3238,7 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, final DatanodeDescriptor clientnode; final long preferredblocksize; + final byte storagePolicyID; final List chosen; checkOperation(OperationCategory.READ); byte[][] pathComponents = FSDirectory.getPathComponentsForReservedPath(src); @@ -3232,6 +3265,7 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, .getClientMachine(); clientnode = blockManager.getDatanodeManager().getDatanodeByHost(clientMachine); preferredblocksize = file.getPreferredBlockSize(); + storagePolicyID = file.getStoragePolicyID(); //find datanode storages final DatanodeManager dm = blockManager.getDatanodeManager(); @@ -3241,10 +3275,9 @@ LocatedBlock getAdditionalDatanode(String src, long fileId, } // choose new datanodes. - final DatanodeStorageInfo[] targets = blockManager.getBlockPlacementPolicy( - ).chooseTarget(src, numAdditionalNodes, clientnode, chosen, true, - // TODO: get storage type from the file - excludes, preferredblocksize, StorageType.DEFAULT); + final DatanodeStorageInfo[] targets = blockManager.chooseTarget4AdditionalDatanode( + src, numAdditionalNodes, clientnode, chosen, + excludes, preferredblocksize, storagePolicyID); final LocatedBlock lb = new LocatedBlock(blk, targets); blockManager.setBlockToken(lb, AccessMode.COPY); return lb; @@ -3931,12 +3964,14 @@ HdfsFileStatus getFileInfo(final String srcArg, boolean resolveLink) try { checkOperation(OperationCategory.READ); src = resolvePath(src, pathComponents); + boolean isSuperUser = true; if (isPermissionEnabled) { checkPermission(pc, src, false, null, null, null, null, false, resolveLink); + isSuperUser = pc.isSuperUser(); } stat = dir.getFileInfo(src, resolveLink, - FSDirectory.isReservedRawName(srcArg)); + FSDirectory.isReservedRawName(srcArg), isSuperUser); } catch (AccessControlException e) { logAuditEvent(false, "getfileinfo", srcArg); throw e; @@ -4165,7 +4200,7 @@ private boolean mkdirsRecursively(String src, PermissionStatus permissions, /** * Get the content summary for a specific file/dir. * - * @param src The string representation of the path to the file + * @param srcArg The string representation of the path to the file * * @throws AccessControlException if access is denied * @throws UnresolvedLinkException if a symlink is encountered. @@ -4741,16 +4776,18 @@ private DirectoryListing getListingInt(final String srcArg, byte[] startAfter, "Can't find startAfter " + startAfterString); } } - + + boolean isSuperUser = true; if (isPermissionEnabled) { if (dir.isDir(src)) { checkPathAccess(pc, src, FsAction.READ_EXECUTE); } else { checkTraverse(pc, src); } + isSuperUser = pc.isSuperUser(); } logAuditEvent(true, "listStatus", srcArg); - dl = dir.getListing(src, startAfter, needLocation); + dl = dir.getListing(src, startAfter, needLocation, isSuperUser); } finally { readUnlock(); } @@ -4900,12 +4937,6 @@ private void closeFile(String path, INodeFile file) { /** * Add the given symbolic link to the fs. Record it in the edits log. - * @param path - * @param target - * @param dirPerms - * @param createParent - * @param logRetryCache - * @param dir */ private INodeSymlink addSymlink(String path, String target, PermissionStatus dirPerms, @@ -5542,6 +5573,9 @@ private void checkMode() { // Have to have write-lock since leaving safemode initializes // repl queues, which requires write lock assert hasWriteLock(); + if (inTransitionToActive()) { + return; + } // if smmthread is already running, the block threshold must have been // reached before, there is no need to enter the safe mode again if (smmthread == null && needEnter()) { @@ -6147,6 +6181,21 @@ private void checkPathAccess(FSPermissionChecker pc, checkPermission(pc, path, false, null, null, access, null); } + private void checkUnreadableBySuperuser(FSPermissionChecker pc, + INode inode, int snapshotId) + throws IOException { + for (XAttr xattr : dir.getXAttrs(inode, snapshotId)) { + if (XAttrHelper.getPrefixName(xattr). + equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { + if (pc.isSuperUser()) { + throw new AccessControlException("Access is denied for " + + pc.getUser() + " since the superuser is not allowed to " + + "perform this operation."); + } + } + } + } + private void checkParentAccess(FSPermissionChecker pc, String path, FsAction access) throws AccessControlException, UnresolvedLinkException { @@ -8572,8 +8621,8 @@ void createEncryptionZone(final String src, final String keyName) throw new IOException("Must specify a key name when creating an " + "encryption zone"); } - KeyVersion keyVersion = provider.getCurrentKey(keyName); - if (keyVersion == null) { + KeyProvider.Metadata metadata = provider.getMetadata(keyName); + if (metadata == null) { /* * It would be nice if we threw something more specific than * IOException when the key is not found, but the KeyProvider API @@ -8584,7 +8633,8 @@ void createEncryptionZone(final String src, final String keyName) */ throw new IOException("Key " + keyName + " doesn't exist."); } - createEncryptionZoneInt(src, keyName, cacheEntry != null); + createEncryptionZoneInt(src, metadata.getCipher(), + keyName, cacheEntry != null); success = true; } catch (AccessControlException e) { logAuditEvent(false, "createEncryptionZone", src); @@ -8594,8 +8644,8 @@ void createEncryptionZone(final String src, final String keyName) } } - private void createEncryptionZoneInt(final String srcArg, String keyName, - final boolean logRetryCache) throws IOException { + private void createEncryptionZoneInt(final String srcArg, String cipher, + String keyName, final boolean logRetryCache) throws IOException { String src = srcArg; HdfsFileStatus resultingStat = null; checkSuperuserPrivilege(); @@ -8609,7 +8659,8 @@ private void createEncryptionZoneInt(final String srcArg, String keyName, checkNameNodeSafeMode("Cannot create encryption zone on " + src); src = resolvePath(src, pathComponents); - final XAttr ezXAttr = dir.createEncryptionZone(src, keyName); + final CipherSuite suite = CipherSuite.convert(cipher); + final XAttr ezXAttr = dir.createEncryptionZone(src, suite, keyName); List xAttrs = Lists.newArrayListWithCapacity(1); xAttrs.add(ezXAttr); getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); @@ -8910,7 +8961,9 @@ private void checkXAttrChangeAccess(String src, XAttr xAttr, AccessControlException { if (isPermissionEnabled && xAttr.getNameSpace() == XAttr.NameSpace.USER) { final INode inode = dir.getINode(src); - if (inode.isDirectory() && inode.getFsPermission().getStickyBit()) { + if (inode != null && + inode.isDirectory() && + inode.getFsPermission().getStickyBit()) { if (!pc.isSuperUser()) { checkOwner(pc, src); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java index c346be9c68174..307f507d508a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; @@ -684,6 +685,20 @@ public final INode setAccessTime(long accessTime, int latestSnapshotId) return this; } + /** + * @return the latest block storage policy id of the INode. Specifically, + * if a storage policy is directly specified on the INode then return the ID + * of that policy. Otherwise follow the latest parental path and return the + * ID of the first specified storage policy. + */ + public abstract byte getStoragePolicyID(); + + /** + * @return the storage policy directly specified on the INode. Return + * {@link BlockStoragePolicy#ID_UNSPECIFIED} if no policy has + * been specified. + */ + public abstract byte getLocalStoragePolicyID(); /** * Breaks {@code path} into components. @@ -711,7 +726,7 @@ static byte[][] getPathComponents(String[] strings) { * @throws AssertionError if the given path is invalid. * @return array of path components. */ - static String[] getPathNames(String path) { + public static String[] getPathNames(String path) { if (path == null || !path.startsWith(Path.SEPARATOR)) { throw new AssertionError("Absolute path required"); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 18b41098bd206..f5579ee89019d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -26,7 +26,9 @@ import java.util.Map; import org.apache.hadoop.fs.PathIsNotDirectoryException; +import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.protocol.SnapshotException; @@ -40,6 +42,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; /** * Directory INode class. @@ -103,6 +106,30 @@ public final INodeDirectory asDirectory() { return this; } + @Override + public byte getLocalStoragePolicyID() { + XAttrFeature f = getXAttrFeature(); + ImmutableList xattrs = f == null ? ImmutableList. of() : f + .getXAttrs(); + for (XAttr xattr : xattrs) { + if (BlockStoragePolicy.isStoragePolicyXAttr(xattr)) { + return (xattr.getValue())[0]; + } + } + return BlockStoragePolicy.ID_UNSPECIFIED; + } + + @Override + public byte getStoragePolicyID() { + byte id = getLocalStoragePolicyID(); + if (id != BlockStoragePolicy.ID_UNSPECIFIED) { + return id; + } + // if it is unspecified, check its parent + return getParent() != null ? getParent().getStoragePolicyID() : + BlockStoragePolicy.ID_UNSPECIFIED; + } + void setQuota(long nsQuota, long dsQuota) { DirectoryWithQuotaFeature quota = getDirectoryWithQuotaFeature(); if (quota != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java index b1e74859a9a04..f0f58a92668e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectoryAttributes.java @@ -18,10 +18,12 @@ package org.apache.hadoop.hdfs.server.namenode; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.permission.PermissionStatus; -import org.apache.hadoop.hdfs.server.namenode.XAttrFeature; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; /** * The attributes of an inode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 94fa686709e49..7af2b713eac86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -28,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; @@ -71,10 +72,14 @@ public static INodeFile valueOf(INode inode, String path, boolean acceptNull) return inode.asFile(); } - /** Format: [16 bits for replication][48 bits for PreferredBlockSize] */ + /** + * Bit format: + * [4-bit storagePolicyID][12-bit replication][48-bit preferredBlockSize] + */ static enum HeaderFormat { PREFERRED_BLOCK_SIZE(null, 48, 1), - REPLICATION(PREFERRED_BLOCK_SIZE.BITS, 16, 1); + REPLICATION(PREFERRED_BLOCK_SIZE.BITS, 12, 1), + STORAGE_POLICY_ID(REPLICATION.BITS, BlockStoragePolicy.ID_BIT_LENGTH, 0); private final LongBitFormat BITS; @@ -90,10 +95,16 @@ static long getPreferredBlockSize(long header) { return PREFERRED_BLOCK_SIZE.BITS.retrieve(header); } - static long toLong(long preferredBlockSize, short replication) { + static byte getStoragePolicyID(long header) { + return (byte)STORAGE_POLICY_ID.BITS.retrieve(header); + } + + static long toLong(long preferredBlockSize, short replication, + byte storagePolicyID) { long h = 0; h = PREFERRED_BLOCK_SIZE.BITS.combine(preferredBlockSize, h); h = REPLICATION.BITS.combine(replication, h); + h = STORAGE_POLICY_ID.BITS.combine(storagePolicyID, h); return h; } } @@ -104,9 +115,10 @@ static long toLong(long preferredBlockSize, short replication) { INodeFile(long id, byte[] name, PermissionStatus permissions, long mtime, long atime, BlockInfo[] blklist, short replication, - long preferredBlockSize) { + long preferredBlockSize, byte storagePolicyID) { super(id, name, permissions, mtime, atime); - header = HeaderFormat.toLong(preferredBlockSize, replication); + header = HeaderFormat.toLong(preferredBlockSize, replication, + storagePolicyID); this.blocks = blklist; } @@ -160,7 +172,6 @@ public boolean isUnderConstruction() { return getFileUnderConstructionFeature() != null; } - /** Convert this file to an {@link INodeFileUnderConstruction}. */ INodeFile toUnderConstruction(String clientName, String clientMachine) { Preconditions.checkState(!isUnderConstruction(), "file is already under construction"); @@ -355,6 +366,32 @@ public long getPreferredBlockSize() { return HeaderFormat.getPreferredBlockSize(header); } + @Override + public byte getLocalStoragePolicyID() { + return HeaderFormat.getStoragePolicyID(header); + } + + @Override + public byte getStoragePolicyID() { + byte id = getLocalStoragePolicyID(); + if (id == BlockStoragePolicy.ID_UNSPECIFIED) { + return this.getParent() != null ? + this.getParent().getStoragePolicyID() : id; + } + return id; + } + + private void setStoragePolicyID(byte storagePolicyId) { + header = HeaderFormat.STORAGE_POLICY_ID.BITS.combine(storagePolicyId, + header); + } + + public final void setStoragePolicyID(byte storagePolicyId, + int latestSnapshotId) throws QuotaExceededException { + recordModification(latestSnapshotId); + setStoragePolicyID(storagePolicyId); + } + @Override public long getHeaderLong() { return header; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileAttributes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileAttributes.java index 47b76b74ab7f5..f9d2700e632f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileAttributes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileAttributes.java @@ -32,12 +32,14 @@ public interface INodeFileAttributes extends INodeAttributes { /** @return preferred block size in bytes */ public long getPreferredBlockSize(); - + /** @return the header as a long. */ public long getHeaderLong(); public boolean metadataEquals(INodeFileAttributes other); + public byte getLocalStoragePolicyID(); + /** A copy of the inode file attributes */ public static class SnapshotCopy extends INodeAttributes.SnapshotCopy implements INodeFileAttributes { @@ -45,10 +47,11 @@ public static class SnapshotCopy extends INodeAttributes.SnapshotCopy public SnapshotCopy(byte[] name, PermissionStatus permissions, AclFeature aclFeature, long modificationTime, long accessTime, - short replication, long preferredBlockSize, XAttrFeature xAttrsFeature) { + short replication, long preferredBlockSize, byte storagePolicyID, + XAttrFeature xAttrsFeature) { super(name, permissions, aclFeature, modificationTime, accessTime, xAttrsFeature); - header = HeaderFormat.toLong(preferredBlockSize, replication); + header = HeaderFormat.toLong(preferredBlockSize, replication, storagePolicyID); } public SnapshotCopy(INodeFile file) { @@ -66,6 +69,11 @@ public long getPreferredBlockSize() { return HeaderFormat.getPreferredBlockSize(header); } + @Override + public byte getLocalStoragePolicyID() { + return HeaderFormat.getStoragePolicyID(header); + } + @Override public long getHeaderLong() { return header; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java index 02c0815c55ec2..87e47151fd864 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java @@ -22,6 +22,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.QuotaExceededException; import org.apache.hadoop.hdfs.server.namenode.Quota.Counts; import org.apache.hadoop.util.GSet; @@ -121,6 +122,16 @@ public Counts cleanSubtree(int snapshotId, int priorSnapshotId, boolean countDiffChange) throws QuotaExceededException { return null; } + + @Override + public byte getStoragePolicyID(){ + return BlockStoragePolicy.ID_UNSPECIFIED; + } + + @Override + public byte getLocalStoragePolicyID() { + return BlockStoragePolicy.ID_UNSPECIFIED; + } }; return map.get(inode); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java index 9bd2ad0ebd514..a4766d1156142 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeReference.java @@ -285,6 +285,16 @@ public final void setAccessTime(long accessTime) { referred.setAccessTime(accessTime); } + @Override + public final byte getStoragePolicyID() { + return referred.getStoragePolicyID(); + } + + @Override + public final byte getLocalStoragePolicyID() { + return referred.getLocalStoragePolicyID(); + } + @Override final void recordModification(int latestSnapshotId) throws QuotaExceededException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java index 6729cd256ce03..45a4bc82a978e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeSymlink.java @@ -145,4 +145,16 @@ public void removeXAttrFeature() { public void addXAttrFeature(XAttrFeature f) { throw new UnsupportedOperationException("XAttrs are not supported on symlinks"); } + + @Override + public byte getStoragePolicyID() { + throw new UnsupportedOperationException( + "Storage policy are not supported on symlinks"); + } + + @Override + public byte getLocalStoragePolicyID() { + throw new UnsupportedOperationException( + "Storage policy are not supported on symlinks"); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java index 34a32689942eb..ba052a495ecd7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodesInPath.java @@ -133,6 +133,7 @@ static INodesInPath resolve(final INodeDirectory startingDir, * be thrown when the path refers to a symbolic link. * @return the specified number of existing INodes in the path */ + // TODO: Eliminate null elements from inodes (to be provided by HDFS-7104) static INodesInPath resolve(final INodeDirectory startingDir, final byte[][] components, final int numOfINodes, final boolean resolveLink) throws UnresolvedLinkException { @@ -311,7 +312,7 @@ private void updateLatestSnapshotId(int sid) { } /** - * @return the whole inodes array including the null elements. + * @return the inodes array excluding the null elements. */ INode[] getINodes() { if (capacity < inodes.length) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java index 404e2059d5f45..512913b3accb1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeLayoutVersion.java @@ -68,7 +68,8 @@ public static enum Feature implements LayoutFeature { XATTRS(-57, "Extended attributes"), CREATE_OVERWRITE(-58, "Use single editlog record for " + "creating file with overwrite"), - XATTRS_NAMESPACE_EXT(-59, "Increase number of xattr namespaces"); + XATTRS_NAMESPACE_EXT(-59, "Increase number of xattr namespaces"), + BLOCK_STORAGE_POLICY(-60, "Block Storage policy"); private final FeatureInfo info; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 1a3b2e1749e02..2a05fcb4d0c28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -585,7 +585,13 @@ public boolean setReplication(String src, short replication) throws IOException { return namesystem.setReplication(src, replication); } - + + @Override + public void setStoragePolicy(String src, String policyName) + throws IOException { + namesystem.setStoragePolicy(src, policyName); + } + @Override // ClientProtocol public void setPermission(String src, FsPermission permissions) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 5cc8a4797e01f..a1871233cd49a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT; -import static org.apache.hadoop.fs.CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY; - import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; @@ -59,10 +56,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; -import org.apache.hadoop.hdfs.protocol.datatransfer.TrustedChannelResolver; -import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataTransferSaslUtil; import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.DataEncryptionKeyFactory; -import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferClient; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; @@ -161,7 +155,6 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { private List snapshottableDirs = null; private final BlockPlacementPolicy bpPolicy; - private final SaslDataTransferClient saslClient; /** * Filesystem checker. @@ -188,12 +181,6 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { networktopology, namenode.getNamesystem().getBlockManager().getDatanodeManager() .getHost2DatanodeMap()); - this.saslClient = new SaslDataTransferClient( - DataTransferSaslUtil.getSaslPropertiesResolver(conf), - TrustedChannelResolver.getInstance(conf), - conf.getBoolean( - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, - IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_DEFAULT)); for (Iterator it = pmap.keySet().iterator(); it.hasNext();) { String key = it.next(); @@ -594,7 +581,7 @@ private void copyBlocksToLostFound(String parent, HdfsFileStatus file, * bad. Both places should be refactored to provide a method to copy blocks * around. */ - private void copyBlock(DFSClient dfs, LocatedBlock lblock, + private void copyBlock(final DFSClient dfs, LocatedBlock lblock, OutputStream fos) throws Exception { int failures = 0; InetSocketAddress targetAddr = null; @@ -647,8 +634,9 @@ public Peer newConnectedPeer(InetSocketAddress addr, try { s.connect(addr, HdfsServerConstants.READ_TIMEOUT); s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT); - peer = TcpPeerServer.peerFromSocketAndKey(saslClient, s, - NamenodeFsck.this, blockToken, datanodeId); + peer = TcpPeerServer.peerFromSocketAndKey( + dfs.getSaslDataTransferClient(), s, NamenodeFsck.this, + blockToken, datanodeId); } finally { if (peer == null) { IOUtils.closeQuietly(s); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java index a5758a7e0e48a..0bdd2a58217f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java @@ -17,5 +17,20 @@ */ package org.apache.hadoop.hdfs.server.namenode; -public class RetryStartFileException extends Exception { +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; + +@InterfaceAudience.Private +public class RetryStartFileException extends IOException { + private static final long serialVersionUID = 1L; + + public RetryStartFileException() { + super("Preconditions for creating a file failed because of a " + + "transient error, retry create later."); + } + + public RetryStartFileException(String s) { + super(s); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java index 237f9d3d5ee25..79dabb33899e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/XAttrPermissionFilter.java @@ -28,6 +28,8 @@ import com.google.common.collect.Lists; import com.google.common.base.Preconditions; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; + /** * There are four types of extended attributes defined by the * following namespaces: @@ -70,6 +72,15 @@ static void checkPermissionForApi(FSPermissionChecker pc, XAttr xAttr, isRawPath && isSuperUser) { return; } + if (XAttrHelper.getPrefixName(xAttr). + equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { + if (xAttr.getValue() != null) { + throw new AccessControlException("Attempt to set a value for '" + + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + + "'. Values are not allowed for this xattr."); + } + return; + } throw new AccessControlException("User doesn't have permission for xattr: " + XAttrHelper.getPrefixName(xAttr)); } @@ -104,6 +115,9 @@ static List filterXAttrsForApi(FSPermissionChecker pc, } else if (xAttr.getNameSpace() == XAttr.NameSpace.RAW && isSuperUser && isRawPath) { filteredXAttrs.add(xAttr); + } else if (XAttrHelper.getPrefixName(xAttr). + equals(SECURITY_XATTR_UNREADABLE_BY_SUPERUSER)) { + filteredXAttrs.add(xAttr); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java index 3c0edfd8f4a8f..a0aa10bf68f03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java @@ -18,12 +18,16 @@ package org.apache.hadoop.hdfs.server.namenode.ha; +import java.util.concurrent.atomic.AtomicBoolean; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.retry.FailoverProxyProvider; public abstract class AbstractNNFailoverProxyProvider implements FailoverProxyProvider { + protected AtomicBoolean fallbackToSimpleAuth; + /** * Inquire whether logical HA URI is used for the implementation. If it is * used, a special token handling may be needed to make sure a token acquired @@ -32,4 +36,16 @@ public abstract class AbstractNNFailoverProxyProvider implements * @return true if logical HA URI is used. false, if not used. */ public abstract boolean useLogicalURI(); + + /** + * Set for tracking if a secure client falls back to simple auth. This method + * is synchronized only to stifle a Findbugs warning. + * + * @param fallbackToSimpleAuth - set to true or false during this method to + * indicate if a secure client falls back to simple auth + */ + public synchronized void setFallbackToSimpleAuth( + AtomicBoolean fallbackToSimpleAuth) { + this.fallbackToSimpleAuth = fallbackToSimpleAuth; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java index 4d196a2adefcf..06aa8fafcd057 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java @@ -122,7 +122,7 @@ public synchronized ProxyInfo getProxy() { if (current.namenode == null) { try { current.namenode = NameNodeProxies.createNonHAProxy(conf, - current.address, xface, ugi, false).getProxy(); + current.address, xface, ugi, false, fallbackToSimpleAuth).getProxy(); } catch (IOException e) { LOG.error("Failed to create RPC proxy to NameNode", e); throw new RuntimeException(e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java index 3f4cda534afb7..ff332253c80c8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -220,7 +220,8 @@ private void loadFileDiffList(InputStream in, INodeFile file, int size) copy = new INodeFileAttributes.SnapshotCopy(pbf.getName() .toByteArray(), permission, acl, fileInPb.getModificationTime(), fileInPb.getAccessTime(), (short) fileInPb.getReplication(), - fileInPb.getPreferredBlockSize(), xAttrs); + fileInPb.getPreferredBlockSize(), + (byte)fileInPb.getStoragePolicyID(), xAttrs); } FileDiff diff = new FileDiff(pbf.getSnapshotId(), copy, null, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index 3949fbdc53940..f8c0fc265fc99 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -26,7 +26,6 @@ import java.net.URI; import java.net.URISyntaxException; import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; import java.util.EnumSet; import java.util.HashSet; import java.util.List; @@ -223,11 +222,8 @@ static DatanodeInfo chooseDatanode(final NameNode namenode, final DatanodeDescriptor clientNode = bm.getDatanodeManager( ).getDatanodeByHost(getRemoteAddress()); if (clientNode != null) { - final DatanodeStorageInfo[] storages = bm.getBlockPlacementPolicy() - .chooseTarget(path, 1, clientNode, - new ArrayList(), false, excludes, blocksize, - // TODO: get storage type from the file - StorageType.DEFAULT); + final DatanodeStorageInfo[] storages = bm.chooseTarget4WebHDFS( + path, clientNode, excludes, blocksize); if (storages.length > 0) { return storages[0].getDatanodeDescriptor(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 13ccae5d1b2d9..556eca677a18f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.tools; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintStream; import java.net.InetSocketAddress; @@ -43,6 +44,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.shell.Command; import org.apache.hadoop.fs.shell.CommandFormat; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -58,23 +60,24 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.RollingUpgradeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; import org.apache.hadoop.hdfs.protocol.SnapshotException; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.ipc.GenericRefreshProtocol; import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; -import org.apache.hadoop.ipc.GenericRefreshProtocol; import org.apache.hadoop.ipc.RefreshResponse; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolClientSideTranslatorPB; +import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolPB; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.RefreshUserMappingsProtocol; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; -import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolClientSideTranslatorPB; -import org.apache.hadoop.ipc.protocolPB.GenericRefreshProtocolPB; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.ToolRunner; @@ -384,6 +387,8 @@ static int run(DistributedFileSystem dfs, String[] argv, int idx) throws IOExcep "\t[-shutdownDatanode [upgrade]]\n" + "\t[-getDatanodeInfo ]\n" + "\t[-metasave filename]\n" + + "\t[-setStoragePolicy path policyName]\n" + + "\t[-getStoragePolicy path]\n" + "\t[-help [cmd]]\n"; /** @@ -589,6 +594,32 @@ private boolean waitExitSafeMode(ClientProtocol nn, boolean inSafeMode) return inSafeMode; } + public int setStoragePolicy(String[] argv) throws IOException { + DistributedFileSystem dfs = getDFS(); + dfs.setStoragePolicy(new Path(argv[1]), argv[2]); + System.out.println("Set storage policy " + argv[2] + " on " + argv[1]); + return 0; + } + + public int getStoragePolicy(String[] argv) throws IOException { + DistributedFileSystem dfs = getDFS(); + HdfsFileStatus status = dfs.getClient().getFileInfo(argv[1]); + if (status == null) { + throw new FileNotFoundException("File/Directory does not exist: " + + argv[1]); + } + byte storagePolicyId = status.getStoragePolicy(); + BlockStoragePolicy.Suite suite = BlockStoragePolicy + .readBlockStorageSuite(getConf()); + BlockStoragePolicy policy = suite.getPolicy(storagePolicyId); + if (policy != null) { + System.out.println("The storage policy of " + argv[1] + ":\n" + policy); + return 0; + } else { + throw new IOException("Cannot identify the storage policy for " + argv[1]); + } + } + /** * Allow snapshot on a directory. * Usage: java DFSAdmin -allowSnapshot snapshotDir @@ -930,7 +961,13 @@ private void printHelp(String cmd) { String getDatanodeInfo = "-getDatanodeInfo \n" + "\tGet the information about the given datanode. This command can\n" + "\tbe used for checking if a datanode is alive.\n"; - + + String setStoragePolicy = "-setStoragePolicy path policyName\n" + + "\tSet the storage policy for a file/directory.\n"; + + String getStoragePolicy = "-getStoragePolicy path\n" + + "\tGet the storage policy for a file/directory.\n"; + String help = "-help [cmd]: \tDisplays help for the given command or all commands if none\n" + "\t\tis specified.\n"; @@ -988,6 +1025,10 @@ private void printHelp(String cmd) { System.out.println(shutdownDatanode); } else if ("getDatanodeInfo".equalsIgnoreCase(cmd)) { System.out.println(getDatanodeInfo); + } else if ("setStoragePolicy".equalsIgnoreCase(cmd)) { + System.out.println(setStoragePolicy); + } else if ("getStoragePolicy".equalsIgnoreCase(cmd)) { + System.out.println(getStoragePolicy); } else if ("help".equals(cmd)) { System.out.println(help); } else { @@ -1019,6 +1060,8 @@ private void printHelp(String cmd) { System.out.println(disallowSnapshot); System.out.println(shutdownDatanode); System.out.println(getDatanodeInfo); + System.out.println(setStoragePolicy); + System.out.println(getStoragePolicy); System.out.println(help); System.out.println(); ToolRunner.printGenericCommandUsage(System.out); @@ -1378,6 +1421,12 @@ private static void printUsage(String cmd) { } else if ("-safemode".equals(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-safemode enter | leave | get | wait]"); + } else if ("-setStoragePolicy".equals(cmd)) { + System.err.println("Usage: java DFSAdmin" + + " [-setStoragePolicy path policyName]"); + } else if ("-getStoragePolicy".equals(cmd)) { + System.err.println("Usage: java DFSAdmin" + + " [-getStoragePolicy path]"); } else if ("-allowSnapshot".equalsIgnoreCase(cmd)) { System.err.println("Usage: hdfs dfsadmin" + " [-allowSnapshot ]"); @@ -1586,6 +1635,16 @@ public int run(String[] argv) throws Exception { printUsage(cmd); return exitCode; } + } else if ("-setStoragePolicy".equals(cmd)) { + if (argv.length != 3) { + printUsage(cmd); + return exitCode; + } + } else if ("-getStoragePolicy".equals(cmd)) { + if (argv.length != 2) { + printUsage(cmd); + return exitCode; + } } // initialize DFSAdmin @@ -1657,6 +1716,10 @@ public int run(String[] argv) throws Exception { exitCode = shutdownDatanode(argv, i); } else if ("-getDatanodeInfo".equals(cmd)) { exitCode = getDatanodeInfo(argv, i); + } else if ("-setStoragePolicy".equals(cmd)) { + exitCode = setStoragePolicy(argv); + } else if ("-getStoragePolicy".equals(cmd)) { + exitCode = getStoragePolicy(argv); } else if ("-help".equals(cmd)) { if (i < argv.length) { printHelp(argv[i]); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java index 8bdea1fd59e32..8a8e61fe31dc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/EnumCounters.java @@ -105,6 +105,15 @@ public final void subtract(final EnumCounters that) { this.counters[i] -= that.counters[i]; } } + + /** @return the sum of all counters. */ + public final long sum() { + long sum = 0; + for(int i = 0; i < counters.length; i++) { + sum += counters[i]; + } + return sum; + } @Override public boolean equals(Object obj) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 321630c18dc1a..2b3d7e66ce4ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.*; @@ -180,9 +181,16 @@ private static String toString(final FsPermission permission) { } /** Convert a string to a FsPermission object. */ - private static FsPermission toFsPermission(final String s, Boolean aclBit) { + private static FsPermission toFsPermission(final String s, Boolean aclBit, + Boolean encBit) { FsPermission perm = new FsPermission(Short.parseShort(s, 8)); - return (aclBit != null && aclBit) ? new FsAclPermission(perm) : perm; + final boolean aBit = (aclBit != null) ? aclBit : false; + final boolean eBit = (encBit != null) ? encBit : false; + if (aBit || eBit) { + return new FsPermissionExtension(perm, aBit, eBit); + } else { + return perm; + } } static enum PathType { @@ -214,12 +222,16 @@ public static String toJsonString(final HdfsFileStatus status, if (perm.getAclBit()) { m.put("aclBit", true); } + if (perm.getEncryptedBit()) { + m.put("encBit", true); + } m.put("accessTime", status.getAccessTime()); m.put("modificationTime", status.getModificationTime()); m.put("blockSize", status.getBlockSize()); m.put("replication", status.getReplication()); m.put("fileId", status.getFileId()); m.put("childrenNum", status.getChildrenNum()); + m.put("storagePolicy", status.getStoragePolicy()); return includeType ? toJsonString(FileStatus.class, m): JSON.toString(m); } @@ -240,7 +252,7 @@ public static HdfsFileStatus toFileStatus(final Map json, boolean includes final String owner = (String) m.get("owner"); final String group = (String) m.get("group"); final FsPermission permission = toFsPermission((String) m.get("permission"), - (Boolean)m.get("aclBit")); + (Boolean)m.get("aclBit"), (Boolean)m.get("encBit")); final long aTime = (Long) m.get("accessTime"); final long mTime = (Long) m.get("modificationTime"); final long blockSize = (Long) m.get("blockSize"); @@ -250,10 +262,12 @@ public static HdfsFileStatus toFileStatus(final Map json, boolean includes Long childrenNumLong = (Long) m.get("childrenNum"); final int childrenNum = (childrenNumLong == null) ? -1 : childrenNumLong.intValue(); + final byte storagePolicy = m.containsKey("storagePolicy") ? + (byte) (long) (Long) m.get("storagePolicy") : + BlockStoragePolicy.ID_UNSPECIFIED; return new HdfsFileStatus(len, type == PathType.DIRECTORY, replication, - blockSize, mTime, aTime, permission, owner, group, - symlink, DFSUtil.string2Bytes(localName), fileId, childrenNum, - null); + blockSize, mTime, aTime, permission, owner, group, symlink, + DFSUtil.string2Bytes(localName), fileId, childrenNum, null, storagePolicy); } /** Convert an ExtendedBlock to a Json map. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c index c382b9a34ed97..ebdcad3e383ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c @@ -79,6 +79,15 @@ struct hdfsFile_internal { int flags; }; +#define HDFS_EXTENDED_FILE_INFO_ENCRYPTED 0x1 + +/** + * Extended file information. + */ +struct hdfsExtendedFileInfo { + int flags; +}; + int hdfsFileIsOpenForRead(hdfsFile file) { return (file->type == HDFS_STREAM_INPUT); @@ -2836,9 +2845,39 @@ tOffset hdfsGetUsed(hdfsFS fs) } return jVal.j; } + +/** + * We cannot add new fields to the hdfsFileInfo structure because it would break + * binary compatibility. The reason is because we return an array + * of hdfsFileInfo structures from hdfsListDirectory. So changing the size of + * those structures would break all programs that relied on finding the second + * element in the array at + sizeof(struct hdfsFileInfo). + * + * So instead, we add the new fields to the hdfsExtendedFileInfo structure. + * This structure is contained in the mOwner string found inside the + * hdfsFileInfo. Specifically, the format of mOwner is: + * + * [owner-string] [null byte] [padding] [hdfsExtendedFileInfo structure] + * + * The padding is added so that the hdfsExtendedFileInfo structure starts on an + * 8-byte boundary. + * + * @param str The string to locate the extended info in. + * @return The offset of the hdfsExtendedFileInfo structure. + */ +static size_t getExtendedFileInfoOffset(const char *str) +{ + int num_64_bit_words = ((strlen(str) + 1) + 7) / 8; + return num_64_bit_words * 8; +} +static struct hdfsExtendedFileInfo *getExtendedFileInfo(hdfsFileInfo *fileInfo) +{ + char *owner = fileInfo->mOwner; + return (struct hdfsExtendedFileInfo *)(owner + + getExtendedFileInfoOffset(owner)); +} - static jthrowable getFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo) { @@ -2852,6 +2891,8 @@ getFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo) const char *cPathName; const char *cUserName; const char *cGroupName; + struct hdfsExtendedFileInfo *extInfo; + size_t extOffset; jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT, "isDir", "()Z"); @@ -2926,9 +2967,24 @@ getFileInfoFromStat(JNIEnv *env, jobject jStat, hdfsFileInfo *fileInfo) jthr = getPendingExceptionAndClear(env); goto done; } - fileInfo->mOwner = strdup(cUserName); + extOffset = getExtendedFileInfoOffset(cUserName); + fileInfo->mOwner = malloc(extOffset + sizeof(struct hdfsExtendedFileInfo)); + if (!fileInfo->mOwner) { + jthr = newRuntimeError(env, "getFileInfo: OOM allocating mOwner"); + goto done; + } + strcpy(fileInfo->mOwner, cUserName); (*env)->ReleaseStringUTFChars(env, jUserName, cUserName); - + extInfo = getExtendedFileInfo(fileInfo); + memset(extInfo, 0, sizeof(*extInfo)); + jthr = invokeMethod(env, &jVal, INSTANCE, jStat, + HADOOP_STAT, "isEncrypted", "()Z"); + if (jthr) { + goto done; + } + if (jVal.z == JNI_TRUE) { + extInfo->flags |= HDFS_EXTENDED_FILE_INFO_ENCRYPTED; + } jthr = invokeMethod(env, &jVal, INSTANCE, jStat, HADOOP_STAT, "getGroup", "()Ljava/lang/String;"); if (jthr) @@ -3174,6 +3230,13 @@ void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries) free(hdfsFileInfo); } +int hdfsFileIsEncrypted(hdfsFileInfo *fileInfo) +{ + struct hdfsExtendedFileInfo *extInfo; + + extInfo = getExtendedFileInfo(fileInfo); + return !!(extInfo->flags & HDFS_EXTENDED_FILE_INFO_ENCRYPTED); +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h index 270db81193131..0625da3759193 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h @@ -592,6 +592,14 @@ extern "C" { */ void hdfsFreeFileInfo(hdfsFileInfo *hdfsFileInfo, int numEntries); + /** + * hdfsFileIsEncrypted: determine if a file is encrypted based on its + * hdfsFileInfo. + * @return -1 if there was an error (errno will be set), 0 if the file is + * not encrypted, 1 if the file is encrypted. + */ + int hdfsFileIsEncrypted(hdfsFileInfo *hdfsFileInfo); + /** * hdfsGetHosts - Get hostnames where a particular block (determined by diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c index cf605e3e2dfa1..64c1a8f86343b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test_libhdfs_threaded.c @@ -217,6 +217,7 @@ static int doTestHdfsOperations(struct tlhThreadInfo *ti, hdfsFS fs, fileInfo = hdfsGetPathInfo(fs, paths->file2); EXPECT_NONNULL(fileInfo); EXPECT_ZERO(strcmp("doop", fileInfo->mGroup)); + EXPECT_ZERO(hdfsFileIsEncrypted(fileInfo)); hdfsFreeFileInfo(fileInfo, 1); EXPECT_ZERO(hdfsChown(fs, paths->file2, "ha", "doop2")); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto index f1673ffef43cf..082e5bd81458b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto @@ -100,6 +100,14 @@ message SetReplicationResponseProto { required bool result = 1; } +message SetStoragePolicyRequestProto { + required string src = 1; + required string policyName = 2; +} + +message SetStoragePolicyResponseProto { // void response +} + message SetPermissionRequestProto { required string src = 1; required FsPermissionProto permission = 2; @@ -689,6 +697,8 @@ service ClientNamenodeProtocol { rpc append(AppendRequestProto) returns(AppendResponseProto); rpc setReplication(SetReplicationRequestProto) returns(SetReplicationResponseProto); + rpc setStoragePolicy(SetStoragePolicyRequestProto) + returns(SetStoragePolicyResponseProto); rpc setPermission(SetPermissionRequestProto) returns(SetPermissionResponseProto); rpc setOwner(SetOwnerRequestProto) returns(SetOwnerResponseProto); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/encryption.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/encryption.proto index 1a33cdcf362fb..c4b7009105c35 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/encryption.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/encryption.proto @@ -46,9 +46,10 @@ message ListEncryptionZonesRequestProto { } message EncryptionZoneProto { - required string path = 1; - required string keyName = 2; - required int64 id = 3; + required int64 id = 1; + required string path = 2; + required CipherSuite suite = 3; + required string keyName = 4; } message ListEncryptionZonesResponseProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto index 29fcd36500211..588f6c8612296 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto @@ -138,6 +138,7 @@ message INodeSection { optional FileUnderConstructionFeature fileUC = 7; optional AclFeatureProto acl = 8; optional XAttrFeatureProto xAttrs = 9; + optional uint32 storagePolicyID = 10; } message INodeDirectory { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto index a410224f02e68..d1ba68f6dca90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/hdfs.proto @@ -158,6 +158,7 @@ message FsPermissionProto { enum StorageTypeProto { DISK = 1; SSD = 2; + ARCHIVE = 3; } /** @@ -215,7 +216,27 @@ message FileEncryptionInfoProto { required CipherSuite suite = 1; required bytes key = 2; required bytes iv = 3; - required string ezKeyVersionName = 4; + required string keyName = 4; + required string ezKeyVersionName = 5; +} + +/** + * Encryption information for an individual + * file within an encryption zone + */ +message PerFileEncryptionInfoProto { + required bytes key = 1; + required bytes iv = 2; + required string ezKeyVersionName = 3; +} + +/** + * Encryption information for an encryption + * zone + */ +message ZoneEncryptionInfoProto { + required CipherSuite suite = 1; + required string keyName = 2; } /** @@ -263,6 +284,8 @@ message HdfsFileStatusProto { // Optional field for file encryption optional FileEncryptionInfoProto fileEncryptionInfo = 15; + + optional uint32 storagePolicy = 16 [default = 0]; // block storage policy id } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml new file mode 100644 index 0000000000000..891909b6b405e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/blockStoragePolicy-default.xml @@ -0,0 +1,118 @@ + + + + + + + + + + + + dfs.block.storage.policies + HOT:12, WARM:8, COLD:4 + + A list of block storage policy names and IDs. The syntax is + + NAME_1:ID_1, NAME_2:ID_2, ..., NAME_n:ID_n + + where ID is an integer in the range [1,15] and NAME is case insensitive. + The first element is the default policy. Empty list is not allowed. + + + + + + dfs.block.storage.policy.12 + DISK + + A list of storage types for storing the block replicas such as + + STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n + + When creating a block, the i-th replica is stored using i-th storage type + for i less than or equal to n, and + the j-th replica is stored using n-th storage type for j greater than n. + + Empty list is not allowed. + + Examples: + DISK : all replicas stored using DISK. + DISK, ARCHIVE : the first replica is stored using DISK and all the + remaining replicas are stored using ARCHIVE. + + + + + dfs.block.storage.policy.creation-fallback.12 + + + A list of storage types for creation fallback storage. + + STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n + + When creating a block, if a particular storage type specified in the policy + is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if + STORAGE_TYPE_i is also unavailable, the fallback STORAGE_TYPE_(i+1) is used. + In case that all fallback storages are unavailabe, the block will be created + with number of replicas less than the specified replication factor. + + An empty list indicates that there is no fallback storage. + + + + + dfs.block.storage.policy.replication-fallback.12 + ARCHIVE + + Similar to dfs.block.storage.policy.creation-fallback.x but for replication. + + + + + + dfs.block.storage.policy.8 + DISK, ARCHIVE + + + + dfs.block.storage.policy.creation-fallback.8 + DISK, ARCHIVE + + + + dfs.block.storage.policy.replication-fallback.8 + DISK, ARCHIVE + + + + + dfs.block.storage.policy.4 + ARCHIVE + + + + dfs.block.storage.policy.creation-fallback.4 + + + + + dfs.block.storage.policy.replication-fallback.4 + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 4c2737995e9c0..d404c1c1e0a9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -22,7 +22,8 @@ - + + hadoop.hdfs.configuration.version @@ -2068,19 +2069,6 @@ - - dfs.namenode.randomize-block-locations-per-block - false - When fetching replica locations of a block, the replicas - are sorted based on network distance. This configuration parameter - determines whether the replicas at the same network distance are randomly - shuffled. By default, this is false, such that repeated requests for a block's - replicas always result in the same order. This potentially improves page cache - behavior. However, for some network topologies, it is desirable to shuffle this - order for better load balancing. - - - dfs.datanode.block.id.layout.upgrade.threads 12 @@ -2137,4 +2125,12 @@ + + dfs.encryption.key.provider.uri + + The KeyProvider to use when interacting with encryption keys used + when reading and writing to an encryption zone. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm new file mode 100644 index 0000000000000..5301d52f32bc9 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ArchivalStorage.apt.vm @@ -0,0 +1,302 @@ +~~ Licensed under the Apache License, Version 2.0 (the "License"); +~~ you may not use this file except in compliance with the License. +~~ You may obtain a copy of the License at +~~ +~~ http://www.apache.org/licenses/LICENSE-2.0 +~~ +~~ Unless required by applicable law or agreed to in writing, software +~~ distributed under the License is distributed on an "AS IS" BASIS, +~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +~~ See the License for the specific language governing permissions and +~~ limitations under the License. See accompanying LICENSE file. + + --- + HDFS Archival Storage + --- + --- + ${maven.build.timestamp} + +HDFS Archival Storage + +%{toc|section=1|fromDepth=0} + +* {Introduction} + + is a solution to decouple growing storage capacity from compute capacity. + Nodes with higher density and less expensive storage with low compute power are becoming available + and can be used as cold storage in the clusters. + Based on policy the data from hot can be moved to the cold. + Adding more nodes to the cold storage can grow the storage independent of the compute capacity + in the cluster. + +* {Storage Types and Storage Policies} + +** {Storage Types: DISK, SSD and ARCHIVE} + + The first phase of + {{{https://issues.apache.org/jira/browse/HDFS-2832}Heterogeneous Storage (HDFS-2832)}} + changed datanode storage model from a single storage, + which may correspond to multiple physical storage medias, + to a collection of storages with each storage corresponding to a physical storage media. + It also added the notion of storage types, DISK and SSD, + where DISK is the default storage type. + + A new storage type , + which has high storage density (petabyte of storage) but little compute power, + is added for supporting archival storage. + +** {Storage Policies: Hot, Warm and Cold} + + A new concept of storage policies is introduced in order to allow files to be stored + in different storage types according to the storage policy. + + We have the following storage policies: + + * <> - for both storage and compute. + The data that is popular and still being used for processing will stay in this policy. + When a block is hot, all replicas are stored in DISK. + + * <> - only for storage with limited compute. + The data that is no longer being used, or data that needs to be archived is moved + from hot storage to cold storage. + When a block is cold, all replicas are stored in ARCHIVE. + + * <> - partially hot and partially cold. + When a block is warm, some of its replicas are stored in DISK + and the remaining replicas are stored in ARCHIVE. + + [] + + More formally, a storage policy consists of the following fields: + + [[1]] Policy ID + + [[2]] Policy name + + [[3]] A list of storage types for block placement + + [[4]] A list of fallback storage types for file creation + + [[5]] A list of fallback storage types for replication + + [] + + When there is enough space, + block replicas are stored according to the storage type list specified in #3. + When some of the storage types in list #3 are running out of space, + the fallback storage type lists specified in #4 and #5 are used + to replace the out-of-space storage types for file creation and replication, respectively. + + The following is a typical storage policy table. + +*--------+---------------+-------------------------+-----------------------+-----------------------+ +| <> | <>| <> | <> | <> | +| <> | <> | <<(n\ replicas)>> | <> | <> | +*--------+---------------+-------------------------+-----------------------+-----------------------+ +| 12 | Hot (default) | DISK: | \ | ARCHIVE | +*--------+---------------+-------------------------+-----------------------+-----------------------+ +| 8 | Warm | DISK: 1, ARCHIVE: -1 | ARCHIVE, DISK | ARCHIVE, DISK | +*--------+---------------+-------------------------+-----------------------+-----------------------+ +| 4 | Cold | ARCHIVE: | \ | \ | +*--------+---------------+-------------------------+-----------------------+-----------------------+ + + Note that cluster administrators may change the storage policy table + according to the characteristic of the cluster. + For example, in order to prevent losing archival data, + administrators may want to use DISK as fallback storage for replication in the Cold policy. + A drawback of such setting is that the DISK storages could be filled up with archival data. + As a result, the entire cluster may become full and cannot serve hot data anymore. + +** {Configurations} + +*** {Setting The List of All Storage Policies} + + * <> + - a list of block storage policy names and IDs. + The syntax is + + NAME_1:ID_1, NAME_2:ID_2, ..., NAME_:ID_ + + where ID is an integer in the closed range [1,15] and NAME is case insensitive. + The first element is the . Empty list is not allowed. + + The default value is shown below. + ++------------------------------------------+ + + dfs.block.storage.policies + HOT:12, WARM:8, COLD:4 + ++------------------------------------------+ + + [] + +*** {Setting Storage Policy Details} + + The following configuration properties are for setting the details of each storage policy, + where <<<\>>> is the actual policy ID. + + * <>> + - a list of storage types for storing the block replicas. + The syntax is + + STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_ + + When creating a block, the -th replica is stored using -th storage type + for less than or equal to , and + the -th replica is stored using -th storage type for greater than . + + Empty list is not allowed. + + Examples: + ++------------------------------------------+ +DISK : all replicas stored using DISK. +DISK, ARCHIVE : the first replica is stored using DISK and all the + remaining replicas are stored using ARCHIVE. ++------------------------------------------+ + + * <>> + - a list of storage types for creation fallback storage. + The syntax is + + STORAGE_TYPE_1, STORAGE_TYPE_2, ..., STORAGE_TYPE_n + + When creating a block, if a particular storage type specified in the policy + is unavailable, the fallback STORAGE_TYPE_1 is used. Further, if + STORAGE_TYPE_ is also unavailable, the fallback STORAGE_TYPE_<(i+1)> is used. + In case all fallback storages are unavailable, the block will be created + with number of replicas less than the specified replication factor. + + An empty list indicates that there is no fallback storage. + + * <>> + - a list of storage types for replication fallback storage. + The usage of this configuration property is similar to + <<>>> + except that it takes effect on replication but not block creation. + + [] + + The following are the default configuration values for Hot, Warm and Cold storage policies. + + * Block Storage Policy <> + ++------------------------------------------+ + + dfs.block.storage.policy.12 + DISK + + + dfs.block.storage.policy.creation-fallback.12 + + + + dfs.block.storage.policy.replication-fallback.12 + ARCHIVE + ++------------------------------------------+ + + * Block Storage Policy <> + ++------------------------------------------+ + + dfs.block.storage.policy.8 + DISK, ARCHIVE + + + dfs.block.storage.policy.creation-fallback.8 + DISK, ARCHIVE + + + dfs.block.storage.policy.replication-fallback.8 + DISK, ARCHIVE + ++------------------------------------------+ + + * Block Storage Policy <> + ++------------------------------------------+ + + dfs.block.storage.policy.4 + ARCHIVE + + + dfs.block.storage.policy.creation-fallback.4 + + + + dfs.block.storage.policy.replication-fallback.4 + + ++------------------------------------------+ + + [] + +* {Mover - A New Data Migration Tool} + + A new data migration tool is added for archiving data. + The tool is similar to Balancer. + It periodically scans the files in HDFS to check if the block placement satisfies the storage policy. + For the blocks violating the storage policy, + it moves the replicas to a different storage type + in order to fulfill the storage policy requirement. + + * Command: + ++------------------------------------------+ +hdfs mover [-p | -f ] ++------------------------------------------+ + + * Arguments: + +*-------------------------+--------------------------------------------------------+ +| <<<-p \>>> | Specify a space separated list of HDFS files/dirs to migrate. +*-------------------------+--------------------------------------------------------+ +| <<<-f \>>> | Specify a local file containing a list of HDFS files/dirs to migrate. +*-------------------------+--------------------------------------------------------+ + + Note that, when both -p and -f options are omitted, the default path is the root directory. + + [] + + +* {<<>> Commands} + +** {Set Storage Policy} + + Set a storage policy to a file or a directory. + + * Command: + ++------------------------------------------+ +hdfs dfsadmin -setStoragePolicy ++------------------------------------------+ + + * Arguments: + +*----------------------+-----------------------------------------------------+ +| <<<\>>> | The path referring to either a directory or a file. | +*----------------------+-----------------------------------------------------+ +| <<<\>>> | The name of the storage policy. | +*----------------------+-----------------------------------------------------+ + + [] + +** {Get Storage Policy} + + Get the storage policy of a file or a directory. + + * Command: + ++------------------------------------------+ +hdfs dfsadmin -getStoragePolicy ++------------------------------------------+ + + * Arguments: + +*----------------------+-----------------------------------------------------+ +| <<<\>>> | The path referring to either a directory or a file. | +*----------------------+-----------------------------------------------------+ + + [] diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ExtendedAttributes.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ExtendedAttributes.apt.vm index 0a99fe50ee65c..109e988a8636d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ExtendedAttributes.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ExtendedAttributes.apt.vm @@ -38,7 +38,7 @@ Extended Attributes in HDFS The <<>> namespace is reserved for internal HDFS use. This namespace is not accessible through userspace methods, and is reserved for implementing internal HDFS features. - The <<>> namespace is reserved for internal HDFS use. This namespace is not accessible through userspace methods. It is currently unused. + The <<>> namespace is reserved for internal HDFS use. This namespace is generally not accessible through userspace methods. One particular use of <<>> is the <<>> extended attribute. This xattr can only be set on files, and it will prevent the superuser from reading the file's contents. The superuser can still read and modify file metadata, such as the owner, permissions, etc. This xattr can be set and accessed by any user, assuming normal filesystem permissions. This xattr is also write-once, and cannot be removed once set. This xattr does not allow a value to be set. The <<>> namespace is reserved for internal system attributes that sometimes need to be exposed. Like <<>> namespace attributes they are not visible to the user except when <<>>/<<>> is called on a file or directory in the <<>> HDFS directory hierarchy. These attributes can only be accessed by the superuser. An example of where <<>> namespace extended attributes are used is the <<>> utility. Encryption zone meta data is stored in <<>> extended attributes, so as long as the administrator uses <<>> pathnames in source and target, the encrypted files in the encryption zones are transparently copied. @@ -82,7 +82,6 @@ Extended Attributes in HDFS * {Configuration options} - HDFS supports extended attributes out of the box, without additional configuration. Administrators could potentially be interested in the options limiting the number of xattrs per inode and the size of xattrs, since xattrs increase the on-disk and in-memory space consumption of an inode. * <<>> diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm index 6eb60f0a1b5da..170f352890cb8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HDFSCommands.apt.vm @@ -147,18 +147,19 @@ HDFS Commands Guide *-----------------+-----------------------------------------------------------+ | -regular | Normal datanode startup (default). *-----------------+-----------------------------------------------------------+ -| -rollback | Rollsback the datanode to the previous version. This should +| -rollback | Rollback the datanode to the previous version. This should | | be used after stopping the datanode and distributing the | | old hadoop version. *-----------------+-----------------------------------------------------------+ -| -rollingupgrade rollback | Rollsback a rolling upgrade operation. +| -rollingupgrade rollback | Rollback a rolling upgrade operation. *-----------------+-----------------------------------------------------------+ ** <<>> Runs a HDFS dfsadmin client. - Usage: <<...] [-setSpaceQuota ...] [-clrSpaceQuota ...] + [-setStoragePolicy ] + [-getStoragePolicy ] [-finalizeUpgrade] [-rollingUpgrade [||]] [-metasave filename] @@ -186,7 +189,8 @@ HDFS Commands Guide [-fetchImage ] [-shutdownDatanode [upgrade]] [-getDatanodeInfo ] - [-help [cmd]]>>> + [-help [cmd]] ++------------------------------------------+ *-----------------+-----------------------------------------------------------+ || COMMAND_OPTION || Description @@ -236,6 +240,10 @@ HDFS Commands Guide | {{{../hadoop-hdfs/HdfsQuotaAdminGuide.html#Administrative_Commands}HDFS Quotas Guide}} | for the detail. *-----------------+-----------------------------------------------------------+ +| -setStoragePolicy \ \ | Set a storage policy to a file or a directory. +*-----------------+-----------------------------------------------------------+ +| -getStoragePolicy \ | Get the storage policy of a file or a directory. +*-----------------+-----------------------------------------------------------+ | -finalizeUpgrade| Finalize upgrade of HDFS. Datanodes delete their previous | version working directories, followed by Namenode doing the | same. This completes the upgrade process. @@ -250,7 +258,7 @@ HDFS Commands Guide | will contain one line for each of the following\ | 1. Datanodes heart beating with Namenode\ | 2. Blocks waiting to be replicated\ - | 3. Blocks currrently being replicated\ + | 3. Blocks currently being replicated\ | 4. Blocks waiting to be deleted *-----------------+-----------------------------------------------------------+ | -refreshServiceAcl | Reload the service-level authorization policy file. @@ -312,12 +320,30 @@ HDFS Commands Guide | is specified. *-----------------+-----------------------------------------------------------+ +** <<>> + + Runs the data migration utility. + See {{{./ArchivalStorage.html#Mover_-_A_New_Data_Migration_Tool}Mover}} for more details. + + Usage: << | -f ]>>> + +*--------------------+--------------------------------------------------------+ +|| COMMAND_OPTION || Description +*--------------------+--------------------------------------------------------+ +| -p \ | Specify a space separated list of HDFS files/dirs to migrate. +*--------------------+--------------------------------------------------------+ +| -f \ | Specify a local file containing a list of HDFS files/dirs to migrate. +*--------------------+--------------------------------------------------------+ + + Note that, when both -p and -f options are omitted, the default path is the root directory. + ** <<>> Runs the namenode. More info about the upgrade, rollback and finalize is at {{{./HdfsUserGuide.html#Upgrade_and_Rollback}Upgrade Rollback}}. - Usage: <<] ] | @@ -329,7 +355,8 @@ HDFS Commands Guide [-initializeSharedEdits] | [-bootstrapStandby] | [-recover [-force] ] | - [-metadataVersion ]>>> + [-metadataVersion ] ++------------------------------------------+ *--------------------+--------------------------------------------------------+ || COMMAND_OPTION || Description @@ -351,7 +378,7 @@ HDFS Commands Guide | -upgradeOnly [-clusterid cid] [-renameReserved\] | Upgrade the | specified NameNode and then shutdown it. *--------------------+--------------------------------------------------------+ -| -rollback | Rollsback the NameNode to the previous version. This +| -rollback | Rollback the NameNode to the previous version. This | should be used after stopping the cluster and | distributing the old Hadoop version. *--------------------+--------------------------------------------------------+ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm index 3689a775efede..0e2cb783b6ad9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm @@ -85,6 +85,12 @@ Transparent Encryption in HDFS A necessary prerequisite is an instance of the KMS, as well as a backing key store for the KMS. See the {{{../../hadoop-kms/index.html}KMS documentation}} for more information. +** Configuring the cluster KeyProvider + +*** dfs.encryption.key.provider.uri + + The KeyProvider to use when interacting with encryption keys used when reading and writing to an encryption zone. + ** Selecting an encryption algorithm and codec *** hadoop.security.crypto.codec.classes.EXAMPLECIPHERSUITE diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java index adeabfe8569c8..1c870a2801d92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java @@ -66,7 +66,7 @@ public void setUp() throws Exception { tmpDir = new File(System.getProperty("test.build.data", "target"), UUID.randomUUID().toString()).getAbsoluteFile(); final Path jksPath = new Path(tmpDir.toString(), "test.jks"); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri()); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 9e442ef9ba016..7be6a49012254 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -24,6 +24,8 @@ import com.google.common.base.Supplier; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -66,6 +68,7 @@ .ConfiguredFailoverProxyProvider; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.net.NetUtils; @@ -75,6 +78,8 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.VersionInfo; import org.junit.Assume; @@ -88,8 +93,7 @@ import java.util.*; import java.util.concurrent.TimeoutException; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY; -import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -872,6 +876,37 @@ public static void setFederatedConfiguration(MiniDFSCluster cluster, conf.set(DFSConfigKeys.DFS_NAMESERVICES, Joiner.on(",") .join(nameservices)); } + + public static void setFederatedHAConfiguration(MiniDFSCluster cluster, + Configuration conf) { + Map> nameservices = Maps.newHashMap(); + for (NameNodeInfo info : cluster.getNameNodeInfos()) { + Preconditions.checkState(info.nameserviceId != null); + List nns = nameservices.get(info.nameserviceId); + if (nns == null) { + nns = Lists.newArrayList(); + nameservices.put(info.nameserviceId, nns); + } + nns.add(info.nnId); + + conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, + info.nameserviceId, info.nnId), + DFSUtil.createUri(HdfsConstants.HDFS_URI_SCHEME, + info.nameNode.getNameNodeAddress()).toString()); + conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, + info.nameserviceId, info.nnId), + DFSUtil.createUri(HdfsConstants.HDFS_URI_SCHEME, + info.nameNode.getNameNodeAddress()).toString()); + } + for (Map.Entry> entry : nameservices.entrySet()) { + conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, + entry.getKey()), Joiner.on(",").join(entry.getValue())); + conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + entry + .getKey(), ConfiguredFailoverProxyProvider.class.getName()); + } + conf.set(DFSConfigKeys.DFS_NAMESERVICES, Joiner.on(",") + .join(nameservices.keySet())); + } private static DatanodeID getDatanodeID(String ipAddr) { return new DatanodeID(ipAddr, "localhost", @@ -955,9 +990,14 @@ public static DatanodeStorageInfo[] createDatanodeStorageInfos(String[] racks, S public static DatanodeStorageInfo[] createDatanodeStorageInfos(int n) { return createDatanodeStorageInfos(n, null, null); } - + public static DatanodeStorageInfo[] createDatanodeStorageInfos( int n, String[] racks, String[] hostnames) { + return createDatanodeStorageInfos(n, racks, hostnames, null); + } + + public static DatanodeStorageInfo[] createDatanodeStorageInfos( + int n, String[] racks, String[] hostnames, StorageType[] types) { DatanodeStorageInfo[] storages = new DatanodeStorageInfo[n]; for(int i = storages.length; i > 0; ) { final String storageID = "s" + i; @@ -965,16 +1005,30 @@ public static DatanodeStorageInfo[] createDatanodeStorageInfos( i--; final String rack = (racks!=null && i < racks.length)? racks[i]: "defaultRack"; final String hostname = (hostnames!=null && i < hostnames.length)? hostnames[i]: "host"; - storages[i] = createDatanodeStorageInfo(storageID, ip, rack, hostname); + final StorageType type = (types != null && i < types.length) ? types[i] + : StorageType.DEFAULT; + storages[i] = createDatanodeStorageInfo(storageID, ip, rack, hostname, + type); } return storages; } + public static DatanodeStorageInfo createDatanodeStorageInfo( String storageID, String ip, String rack, String hostname) { - final DatanodeStorage storage = new DatanodeStorage(storageID); - final DatanodeDescriptor dn = BlockManagerTestUtil.getDatanodeDescriptor(ip, rack, storage, hostname); + return createDatanodeStorageInfo(storageID, ip, rack, hostname, + StorageType.DEFAULT); + } + + public static DatanodeStorageInfo createDatanodeStorageInfo( + String storageID, String ip, String rack, String hostname, + StorageType type) { + final DatanodeStorage storage = new DatanodeStorage(storageID, + DatanodeStorage.State.NORMAL, type); + final DatanodeDescriptor dn = BlockManagerTestUtil.getDatanodeDescriptor( + ip, rack, storage, hostname); return BlockManagerTestUtil.newDatanodeStorageInfo(dn, storage); } + public static DatanodeDescriptor[] toDatanodeDescriptor( DatanodeStorageInfo[] storages) { DatanodeDescriptor[] datanodes = new DatanodeDescriptor[storages.length]; @@ -1061,6 +1115,8 @@ public static void runOperations(MiniDFSCluster cluster, FSDataOutputStream s = filesystem.create(pathFileCreate); // OP_CLOSE 9 s.close(); + // OP_SET_STORAGE_POLICY 45 + filesystem.setStoragePolicy(pathFileCreate, "HOT"); // OP_RENAME_OLD 1 final Path pathFileMoved = new Path("/file_moved"); filesystem.rename(pathFileCreate, pathFileMoved); @@ -1421,6 +1477,57 @@ public static DatanodeDescriptor getExpectedPrimaryNode(NameNode nn, return expectedPrimary.getDatanodeDescriptor(); } + public static void toolRun(Tool tool, String cmd, int retcode, String contain) + throws Exception { + String [] cmds = StringUtils.split(cmd, ' '); + System.out.flush(); + System.err.flush(); + PrintStream origOut = System.out; + PrintStream origErr = System.err; + String output = null; + int ret = 0; + try { + ByteArrayOutputStream bs = new ByteArrayOutputStream(1024); + PrintStream out = new PrintStream(bs); + System.setOut(out); + System.setErr(out); + ret = tool.run(cmds); + System.out.flush(); + System.err.flush(); + out.close(); + output = bs.toString(); + } finally { + System.setOut(origOut); + System.setErr(origErr); + } + System.out.println("Output for command: " + cmd + " retcode: " + ret); + if (output != null) { + System.out.println(output); + } + assertEquals(retcode, ret); + if (contain != null) { + assertTrue("The real output is: " + output + ".\n It should contain: " + + contain, output.contains(contain)); + } + } + + public static void FsShellRun(String cmd, int retcode, String contain, + Configuration conf) throws Exception { + FsShell shell = new FsShell(new Configuration(conf)); + toolRun(shell, cmd, retcode, contain); + } + + public static void DFSAdminRun(String cmd, int retcode, String contain, + Configuration conf) throws Exception { + DFSAdmin admin = new DFSAdmin(new Configuration(conf)); + toolRun(admin, cmd, retcode, contain); + } + + public static void FsShellRun(String cmd, Configuration conf) + throws Exception { + FsShellRun(cmd, 0, null, conf); + } + public static void addDataNodeLayoutVersion(final int lv, final String description) throws NoSuchFieldException, IllegalAccessException { Preconditions.checkState(lv < DataNodeLayoutVersion.CURRENT_LAYOUT_VERSION); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index 0e49cfec0537f..0512b7f43651a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -1474,19 +1474,21 @@ public synchronized void startDataNodes(Configuration conf, int numDataNodes, secureResources, dn.getIpcPort())); dns[i - curDatanodesNum] = dn; } - curDatanodesNum += numDataNodes; this.numDataNodes += numDataNodes; waitActive(); - + if (storageCapacities != null) { for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; ++i) { - List volumes = dns[i].getFSDataset().getVolumes(); - assert storageCapacities[i].length == storagesPerDatanode; + final int index = i - curDatanodesNum; + List volumes = dns[index].getFSDataset().getVolumes(); + assert storageCapacities[index].length == storagesPerDatanode; assert volumes.size() == storagesPerDatanode; for (int j = 0; j < volumes.size(); ++j) { FsVolumeImpl volume = (FsVolumeImpl) volumes.get(j); - volume.setCapacityForTesting(storageCapacities[i][j]); + LOG.info("setCapacityForTesting " + storageCapacities[index][j] + + " for [" + volume.getStorageType() + "]" + volume.getStorageID()); + volume.setCapacityForTesting(storageCapacities[index][j]); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java new file mode 100644 index 0000000000000..158c22547cb41 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java @@ -0,0 +1,1075 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import static org.apache.hadoop.hdfs.BlockStoragePolicy.ID_UNSPECIFIED; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + +import com.google.common.collect.Lists; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.protocol.*; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.server.blockmanagement.*; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.net.Node; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.PathUtils; +import org.junit.Assert; +import org.junit.Test; + +/** Test {@link BlockStoragePolicy} */ +public class TestBlockStoragePolicy { + public static final BlockStoragePolicy.Suite POLICY_SUITE; + public static final BlockStoragePolicy DEFAULT_STORAGE_POLICY; + public static final Configuration conf; + + static { + conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1); + POLICY_SUITE = BlockStoragePolicy.readBlockStorageSuite(conf); + DEFAULT_STORAGE_POLICY = POLICY_SUITE.getDefaultPolicy(); + } + + static final EnumSet none = EnumSet.noneOf(StorageType.class); + static final EnumSet archive = EnumSet.of(StorageType.ARCHIVE); + static final EnumSet disk = EnumSet.of(StorageType.DISK); + static final EnumSet both = EnumSet.of(StorageType.DISK, StorageType.ARCHIVE); + + static final long FILE_LEN = 1024; + static final short REPLICATION = 3; + + static final byte COLD = (byte) 4; + static final byte WARM = (byte) 8; + static final byte HOT = (byte) 12; + + @Test + public void testDefaultPolicies() { + final Map expectedPolicyStrings = new HashMap(); + expectedPolicyStrings.put(COLD, + "BlockStoragePolicy{COLD:4, storageTypes=[ARCHIVE], creationFallbacks=[], replicationFallbacks=[]"); + expectedPolicyStrings.put(WARM, + "BlockStoragePolicy{WARM:8, storageTypes=[DISK, ARCHIVE], creationFallbacks=[DISK, ARCHIVE], replicationFallbacks=[DISK, ARCHIVE]"); + expectedPolicyStrings.put(HOT, + "BlockStoragePolicy{HOT:12, storageTypes=[DISK], creationFallbacks=[], replicationFallbacks=[ARCHIVE]"); + + for(byte i = 1; i < 16; i++) { + final BlockStoragePolicy policy = POLICY_SUITE.getPolicy(i); + if (policy != null) { + final String s = policy.toString(); + Assert.assertEquals(expectedPolicyStrings.get(i), s); + } + } + Assert.assertEquals(POLICY_SUITE.getPolicy(HOT), POLICY_SUITE.getDefaultPolicy()); + + { // check Cold policy + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + for(short replication = 1; replication < 6; replication++) { + final List computed = cold.chooseStorageTypes(replication); + assertStorageType(computed, replication, StorageType.ARCHIVE); + } + assertCreationFallback(cold, null, null, null); + assertReplicationFallback(cold, null, null, null); + } + + { // check Warm policy + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + for(short replication = 1; replication < 6; replication++) { + final List computed = warm.chooseStorageTypes(replication); + assertStorageType(computed, replication, StorageType.DISK, StorageType.ARCHIVE); + } + assertCreationFallback(warm, StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + assertReplicationFallback(warm, StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + } + + { // check Hot policy + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + for(short replication = 1; replication < 6; replication++) { + final List computed = hot.chooseStorageTypes(replication); + assertStorageType(computed, replication, StorageType.DISK); + } + assertCreationFallback(hot, null, null, null); + assertReplicationFallback(hot, StorageType.ARCHIVE, null, StorageType.ARCHIVE); + } + } + + static StorageType[] newStorageTypes(int nDisk, int nArchive) { + final StorageType[] t = new StorageType[nDisk + nArchive]; + Arrays.fill(t, 0, nDisk, StorageType.DISK); + Arrays.fill(t, nDisk, t.length, StorageType.ARCHIVE); + return t; + } + + static List asList(int nDisk, int nArchive) { + return Arrays.asList(newStorageTypes(nDisk, nArchive)); + } + + static void assertStorageType(List computed, short replication, + StorageType... answers) { + Assert.assertEquals(replication, computed.size()); + final StorageType last = answers[answers.length - 1]; + for(int i = 0; i < computed.size(); i++) { + final StorageType expected = i < answers.length? answers[i]: last; + Assert.assertEquals(expected, computed.get(i)); + } + } + + static void assertCreationFallback(BlockStoragePolicy policy, StorageType noneExpected, + StorageType archiveExpected, StorageType diskExpected) { + Assert.assertEquals(noneExpected, policy.getCreationFallback(none)); + Assert.assertEquals(archiveExpected, policy.getCreationFallback(archive)); + Assert.assertEquals(diskExpected, policy.getCreationFallback(disk)); + Assert.assertEquals(null, policy.getCreationFallback(both)); + } + + static void assertReplicationFallback(BlockStoragePolicy policy, StorageType noneExpected, + StorageType archiveExpected, StorageType diskExpected) { + Assert.assertEquals(noneExpected, policy.getReplicationFallback(none)); + Assert.assertEquals(archiveExpected, policy.getReplicationFallback(archive)); + Assert.assertEquals(diskExpected, policy.getReplicationFallback(disk)); + Assert.assertEquals(null, policy.getReplicationFallback(both)); + } + + private static interface CheckChooseStorageTypes { + public void checkChooseStorageTypes(BlockStoragePolicy p, short replication, + List chosen, StorageType... expected); + + /** Basic case: pass only replication and chosen */ + static final CheckChooseStorageTypes Basic = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, short replication, + List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, chosen); + assertStorageTypes(types, expected); + } + }; + + /** With empty unavailables and isNewBlock=true */ + static final CheckChooseStorageTypes EmptyUnavailablesAndNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, none, true); + assertStorageTypes(types, expected); + } + }; + + /** With empty unavailables and isNewBlock=false */ + static final CheckChooseStorageTypes EmptyUnavailablesAndNonNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, none, false); + assertStorageTypes(types, expected); + } + }; + + /** With both DISK and ARCHIVE unavailables and isNewBlock=true */ + static final CheckChooseStorageTypes BothUnavailableAndNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, both, true); + assertStorageTypes(types, expected); + } + }; + + /** With both DISK and ARCHIVE unavailable and isNewBlock=false */ + static final CheckChooseStorageTypes BothUnavailableAndNonNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, both, false); + assertStorageTypes(types, expected); + } + }; + + /** With ARCHIVE unavailable and isNewBlock=true */ + static final CheckChooseStorageTypes ArchivalUnavailableAndNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, archive, true); + assertStorageTypes(types, expected); + } + }; + + /** With ARCHIVE unavailable and isNewBlock=true */ + static final CheckChooseStorageTypes ArchivalUnavailableAndNonNewBlock + = new CheckChooseStorageTypes() { + @Override + public void checkChooseStorageTypes(BlockStoragePolicy p, + short replication, List chosen, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, + chosen, archive, false); + assertStorageTypes(types, expected); + } + }; + } + + @Test + public void testChooseStorageTypes() { + run(CheckChooseStorageTypes.Basic); + run(CheckChooseStorageTypes.EmptyUnavailablesAndNewBlock); + run(CheckChooseStorageTypes.EmptyUnavailablesAndNonNewBlock); + } + + private static void run(CheckChooseStorageTypes method) { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + { + final List chosen = Lists.newArrayList(); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK, StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK, StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.ARCHIVE); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen); + method.checkChooseStorageTypes(cold, replication, chosen, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + } + + @Test + public void testChooseStorageTypesWithBothUnavailable() { + runWithBothUnavailable(CheckChooseStorageTypes.BothUnavailableAndNewBlock); + runWithBothUnavailable(CheckChooseStorageTypes.BothUnavailableAndNonNewBlock); + } + + private static void runWithBothUnavailable(CheckChooseStorageTypes method) { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + for(int n = 0; n <= 3; n++) { + for(int d = 0; d <= n; d++) { + final int a = n - d; + final List chosen = asList(d, a); + method.checkChooseStorageTypes(hot, replication, chosen); + method.checkChooseStorageTypes(warm, replication, chosen); + method.checkChooseStorageTypes(cold, replication, chosen); + } + } + } + + @Test + public void testChooseStorageTypesWithDiskUnavailableAndNewBlock() { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + final EnumSet unavailables = disk; + final boolean isNewBlock = true; + { + final List chosen = Lists.newArrayList(); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock); + } + } + + @Test + public void testChooseStorageTypesWithArchiveUnavailable() { + runWithArchiveUnavailable(CheckChooseStorageTypes.ArchivalUnavailableAndNewBlock); + runWithArchiveUnavailable(CheckChooseStorageTypes.ArchivalUnavailableAndNonNewBlock); + } + + private static void runWithArchiveUnavailable(CheckChooseStorageTypes method) { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + { + final List chosen = Lists.newArrayList(); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList(StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList(StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(hot, replication, chosen); + method.checkChooseStorageTypes(warm, replication, chosen); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen); + method.checkChooseStorageTypes(cold, replication, chosen); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + method.checkChooseStorageTypes(hot, replication, chosen, + StorageType.DISK, StorageType.DISK, StorageType.DISK); + method.checkChooseStorageTypes(warm, replication, chosen, + StorageType.DISK); + method.checkChooseStorageTypes(cold, replication, chosen); + } + } + + @Test + public void testChooseStorageTypesWithDiskUnavailableAndNonNewBlock() { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + final EnumSet unavailables = disk; + final boolean isNewBlock = false; + { + final List chosen = Lists.newArrayList(); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList(StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.DISK); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.DISK, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE, StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.DISK, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock, + StorageType.ARCHIVE); + } + + { + final List chosen = Arrays.asList( + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE); + checkChooseStorageTypes(hot, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(warm, replication, chosen, unavailables, isNewBlock); + checkChooseStorageTypes(cold, replication, chosen, unavailables, isNewBlock); + } + } + + static void checkChooseStorageTypes(BlockStoragePolicy p, short replication, + List chosen, EnumSet unavailables, + boolean isNewBlock, StorageType... expected) { + final List types = p.chooseStorageTypes(replication, chosen, + unavailables, isNewBlock); + assertStorageTypes(types, expected); + } + + static void assertStorageTypes(List computed, StorageType... expected) { + assertStorageTypes(computed.toArray(StorageType.EMPTY_ARRAY), expected); + } + + static void assertStorageTypes(StorageType[] computed, StorageType... expected) { + Arrays.sort(expected); + Arrays.sort(computed); + Assert.assertArrayEquals(expected, computed); + } + + @Test + public void testChooseExcess() { + final BlockStoragePolicy hot = POLICY_SUITE.getPolicy(HOT); + final BlockStoragePolicy warm = POLICY_SUITE.getPolicy(WARM); + final BlockStoragePolicy cold = POLICY_SUITE.getPolicy(COLD); + + final short replication = 3; + for(int n = 0; n <= 6; n++) { + for(int d = 0; d <= n; d++) { + final int a = n - d; + final List chosen = asList(d, a); + { + final int nDisk = Math.max(0, d - replication); + final int nArchive = a; + final StorageType[] expected = newStorageTypes(nDisk, nArchive); + checkChooseExcess(hot, replication, chosen, expected); + } + + { + final int nDisk = Math.max(0, d - 1); + final int nArchive = Math.max(0, a - replication + 1); + final StorageType[] expected = newStorageTypes(nDisk, nArchive); + checkChooseExcess(warm, replication, chosen, expected); + } + + { + final int nDisk = d; + final int nArchive = Math.max(0, a - replication ); + final StorageType[] expected = newStorageTypes(nDisk, nArchive); + checkChooseExcess(cold, replication, chosen, expected); + } + } + } + } + + static void checkChooseExcess(BlockStoragePolicy p, short replication, + List chosen, StorageType... expected) { + final List types = p.chooseExcess(replication, chosen); + assertStorageTypes(types, expected); + } + + private void checkDirectoryListing(HdfsFileStatus[] stats, byte... policies) { + Assert.assertEquals(stats.length, policies.length); + for (int i = 0; i < stats.length; i++) { + Assert.assertEquals(stats[i].getStoragePolicy(), policies[i]); + } + } + + @Test + public void testSetStoragePolicy() throws Exception { + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(REPLICATION).build(); + cluster.waitActive(); + final DistributedFileSystem fs = cluster.getFileSystem(); + try { + final Path dir = new Path("/testSetStoragePolicy"); + final Path fooFile = new Path(dir, "foo"); + final Path barDir = new Path(dir, "bar"); + final Path barFile1= new Path(barDir, "f1"); + final Path barFile2= new Path(barDir, "f2"); + DFSTestUtil.createFile(fs, fooFile, FILE_LEN, REPLICATION, 0L); + DFSTestUtil.createFile(fs, barFile1, FILE_LEN, REPLICATION, 0L); + DFSTestUtil.createFile(fs, barFile2, FILE_LEN, REPLICATION, 0L); + + final String invalidPolicyName = "INVALID-POLICY"; + try { + fs.setStoragePolicy(fooFile, invalidPolicyName); + Assert.fail("Should throw a HadoopIllegalArgumentException"); + } catch (RemoteException e) { + GenericTestUtils.assertExceptionContains(invalidPolicyName, e); + } + + // check storage policy + HdfsFileStatus[] dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + HdfsFileStatus[] barList = fs.getClient().listPaths(barDir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(dirList, ID_UNSPECIFIED, ID_UNSPECIFIED); + checkDirectoryListing(barList, ID_UNSPECIFIED, ID_UNSPECIFIED); + + final Path invalidPath = new Path("/invalidPath"); + try { + fs.setStoragePolicy(invalidPath, "WARM"); + Assert.fail("Should throw a FileNotFoundException"); + } catch (FileNotFoundException e) { + GenericTestUtils.assertExceptionContains(invalidPath.toString(), e); + } + + fs.setStoragePolicy(fooFile, "COLD"); + fs.setStoragePolicy(barDir, "WARM"); + fs.setStoragePolicy(barFile2, "HOT"); + + dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + barList = fs.getClient().listPaths(barDir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + checkDirectoryListing(dirList, WARM, COLD); // bar is warm, foo is cold + checkDirectoryListing(barList, WARM, HOT); + + // restart namenode to make sure the editlog is correct + cluster.restartNameNode(true); + dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + barList = fs.getClient().listPaths(barDir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(dirList, WARM, COLD); // bar is warm, foo is cold + checkDirectoryListing(barList, WARM, HOT); + + // restart namenode with checkpoint to make sure the fsimage is correct + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + cluster.restartNameNode(true); + dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + barList = fs.getClient().listPaths(barDir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + checkDirectoryListing(dirList, WARM, COLD); // bar is warm, foo is cold + checkDirectoryListing(barList, WARM, HOT); + } finally { + cluster.shutdown(); + } + } + + @Test + public void testSetStoragePolicyWithSnapshot() throws Exception { + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(REPLICATION).build(); + cluster.waitActive(); + final DistributedFileSystem fs = cluster.getFileSystem(); + try { + final Path dir = new Path("/testSetStoragePolicyWithSnapshot"); + final Path fooDir = new Path(dir, "foo"); + final Path fooFile1= new Path(fooDir, "f1"); + final Path fooFile2= new Path(fooDir, "f2"); + DFSTestUtil.createFile(fs, fooFile1, FILE_LEN, REPLICATION, 0L); + DFSTestUtil.createFile(fs, fooFile2, FILE_LEN, REPLICATION, 0L); + + fs.setStoragePolicy(fooDir, "WARM"); + + HdfsFileStatus[] dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(dirList, WARM); + HdfsFileStatus[] fooList = fs.getClient().listPaths(fooDir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(fooList, WARM, WARM); + + // take snapshot + SnapshotTestHelper.createSnapshot(fs, dir, "s1"); + // change the storage policy of fooFile1 + fs.setStoragePolicy(fooFile1, "COLD"); + + fooList = fs.getClient().listPaths(fooDir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + checkDirectoryListing(fooList, COLD, WARM); + + // check the policy for /dir/.snapshot/s1/foo/f1. Note we always return + // the latest storage policy for a file/directory. + Path s1f1 = SnapshotTestHelper.getSnapshotPath(dir, "s1", "foo/f1"); + DirectoryListing f1Listing = fs.getClient().listPaths(s1f1.toString(), + HdfsFileStatus.EMPTY_NAME); + checkDirectoryListing(f1Listing.getPartialListing(), COLD); + + // delete f1 + fs.delete(fooFile1, true); + fooList = fs.getClient().listPaths(fooDir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + checkDirectoryListing(fooList, WARM); + // check the policy for /dir/.snapshot/s1/foo/f1 again after the deletion + checkDirectoryListing(fs.getClient().listPaths(s1f1.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(), COLD); + + // change the storage policy of foo dir + fs.setStoragePolicy(fooDir, "HOT"); + // /dir/foo is now hot + dirList = fs.getClient().listPaths(dir.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(dirList, HOT); + // /dir/foo/f2 is hot + fooList = fs.getClient().listPaths(fooDir.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(); + checkDirectoryListing(fooList, HOT); + + // check storage policy of snapshot path + Path s1 = SnapshotTestHelper.getSnapshotRoot(dir, "s1"); + Path s1foo = SnapshotTestHelper.getSnapshotPath(dir, "s1", "foo"); + checkDirectoryListing(fs.getClient().listPaths(s1.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(), HOT); + // /dir/.snapshot/.s1/foo/f1 and /dir/.snapshot/.s1/foo/f2 should still + // follow the latest + checkDirectoryListing(fs.getClient().listPaths(s1foo.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(), COLD, HOT); + + // delete foo + fs.delete(fooDir, true); + checkDirectoryListing(fs.getClient().listPaths(s1.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(), HOT); + checkDirectoryListing(fs.getClient().listPaths(s1foo.toString(), + HdfsFileStatus.EMPTY_NAME).getPartialListing(), COLD, HOT); + } finally { + cluster.shutdown(); + } + } + + private static StorageType[][] genStorageTypes(int numDataNodes) { + StorageType[][] types = new StorageType[numDataNodes][]; + for (int i = 0; i < types.length; i++) { + types[i] = new StorageType[]{StorageType.DISK, StorageType.ARCHIVE}; + } + return types; + } + + private void checkLocatedBlocks(HdfsLocatedFileStatus status, int blockNum, + int replicaNum, StorageType... types) { + List typeList = Lists.newArrayList(); + Collections.addAll(typeList, types); + LocatedBlocks lbs = status.getBlockLocations(); + Assert.assertEquals(blockNum, lbs.getLocatedBlocks().size()); + for (LocatedBlock lb : lbs.getLocatedBlocks()) { + Assert.assertEquals(replicaNum, lb.getStorageTypes().length); + for (StorageType type : lb.getStorageTypes()) { + Assert.assertTrue(typeList.remove(type)); + } + } + Assert.assertTrue(typeList.isEmpty()); + } + + private void testIncreaseFileRep(String policyName, byte policyId, + StorageType[] before, + StorageType[] after) throws Exception { + final int numDataNodes = 5; + final StorageType[][] types = genStorageTypes(numDataNodes); + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numDataNodes).storageTypes(types).build(); + cluster.waitActive(); + final DistributedFileSystem fs = cluster.getFileSystem(); + try { + final Path dir = new Path("/test"); + fs.mkdirs(dir); + fs.setStoragePolicy(dir, policyName); + + final Path foo = new Path(dir, "foo"); + DFSTestUtil.createFile(fs, foo, FILE_LEN, REPLICATION, 0L); + + // the storage policy of foo should be WARM, and the replicas + // should be stored in DISK and ARCHIE + HdfsFileStatus[] status = fs.getClient().listPaths(foo.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(status, policyId); + HdfsLocatedFileStatus fooStatus = (HdfsLocatedFileStatus) status[0]; + checkLocatedBlocks(fooStatus, 1, 3, before); + + // change the replication factor to 5 + fs.setReplication(foo, (short) numDataNodes); + Thread.sleep(1000); + for (DataNode dn : cluster.getDataNodes()) { + DataNodeTestUtils.triggerHeartbeat(dn); + } + Thread.sleep(1000); + status = fs.getClient().listPaths(foo.toString(), + HdfsFileStatus.EMPTY_NAME, true).getPartialListing(); + checkDirectoryListing(status, policyId); + fooStatus = (HdfsLocatedFileStatus) status[0]; + checkLocatedBlocks(fooStatus, 1, 5, after); + } finally { + cluster.shutdown(); + } + } + + /** + * Consider a File with Hot storage policy. Increase replication factor of + * that file from 3 to 5. Make sure all replications are created in DISKS. + */ + @Test + public void testIncreaseHotFileRep() throws Exception { + testIncreaseFileRep("HOT", HOT, new StorageType[]{StorageType.DISK, + StorageType.DISK, StorageType.DISK}, + new StorageType[]{StorageType.DISK, StorageType.DISK, + StorageType.DISK, StorageType.DISK, StorageType.DISK}); + } + + /** + * Consider a File with Warm temperature. Increase replication factor of + * that file from 3 to 5. Make sure all replicas are created in DISKS + * and ARCHIVE. + */ + @Test + public void testIncreaseWarmRep() throws Exception { + testIncreaseFileRep("WARM", WARM, new StorageType[]{StorageType.DISK, + StorageType.ARCHIVE, StorageType.ARCHIVE}, + new StorageType[]{StorageType.DISK, StorageType.ARCHIVE, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE}); + } + + /** + * Consider a File with Cold temperature. Increase replication factor of + * that file from 3 to 5. Make sure all replicas are created in ARCHIVE. + */ + @Test + public void testIncreaseColdRep() throws Exception { + testIncreaseFileRep("COLD", COLD, new StorageType[]{StorageType.ARCHIVE, + StorageType.ARCHIVE, StorageType.ARCHIVE}, + new StorageType[]{StorageType.ARCHIVE, StorageType.ARCHIVE, + StorageType.ARCHIVE, StorageType.ARCHIVE, StorageType.ARCHIVE}); + } + + @Test + public void testChooseTargetWithTopology() throws Exception { + BlockStoragePolicy policy1 = new BlockStoragePolicy((byte) 9, "TEST1", + new StorageType[]{StorageType.SSD, StorageType.DISK, + StorageType.ARCHIVE}, new StorageType[]{}, new StorageType[]{}); + BlockStoragePolicy policy2 = new BlockStoragePolicy((byte) 11, "TEST2", + new StorageType[]{StorageType.DISK, StorageType.SSD, + StorageType.ARCHIVE}, new StorageType[]{}, new StorageType[]{}); + + final String[] racks = {"/d1/r1", "/d1/r2", "/d1/r2"}; + final String[] hosts = {"host1", "host2", "host3"}; + final StorageType[] types = {StorageType.DISK, StorageType.SSD, + StorageType.ARCHIVE}; + + final DatanodeStorageInfo[] storages = DFSTestUtil + .createDatanodeStorageInfos(3, racks, hosts, types); + final DatanodeDescriptor[] dataNodes = DFSTestUtil + .toDatanodeDescriptor(storages); + + FileSystem.setDefaultUri(conf, "hdfs://localhost:0"); + conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0"); + File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class); + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, + new File(baseDir, "name").getPath()); + DFSTestUtil.formatNameNode(conf); + NameNode namenode = new NameNode(conf); + + final BlockManager bm = namenode.getNamesystem().getBlockManager(); + BlockPlacementPolicy replicator = bm.getBlockPlacementPolicy(); + NetworkTopology cluster = bm.getDatanodeManager().getNetworkTopology(); + for (DatanodeDescriptor datanode : dataNodes) { + cluster.add(datanode); + } + + DatanodeStorageInfo[] targets = replicator.chooseTarget("/foo", 3, + dataNodes[0], Collections.emptyList(), false, + new HashSet(), 0, policy1); + System.out.println(Arrays.asList(targets)); + Assert.assertEquals(3, targets.length); + targets = replicator.chooseTarget("/foo", 3, + dataNodes[0], Collections.emptyList(), false, + new HashSet(), 0, policy2); + System.out.println(Arrays.asList(targets)); + Assert.assertEquals(3, targets.length); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java index 74daccc9e2d51..5659e92aa716e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java @@ -255,12 +255,12 @@ public Object answer(InvocationOnMock invocation) Mockito.doReturn( new HdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission( (short) 777), "owner", "group", new byte[0], new byte[0], - 1010, 0, null)).when(mockNN).getFileInfo(anyString()); + 1010, 0, null, (byte) 0)).when(mockNN).getFileInfo(anyString()); Mockito.doReturn( new HdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission( (short) 777), "owner", "group", new byte[0], new byte[0], - 1010, 0, null)) + 1010, 0, null, (byte) 0)) .when(mockNN) .create(anyString(), (FsPermission) anyObject(), anyString(), (EnumSetWritable) anyObject(), anyBoolean(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java index 45b588b93a2df..a608ba836065d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSInotifyEventInputStream.java @@ -64,7 +64,7 @@ private static Event waitForNextEvent(DFSInotifyEventInputStream eis) */ @Test public void testOpcodeCount() { - Assert.assertTrue(FSEditLogOpCodes.values().length == 46); + Assert.assertTrue(FSEditLogOpCodes.values().length == 47); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java index 96f5fcee07d39..b80fd45e44380 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java @@ -25,7 +25,9 @@ import java.io.RandomAccessFile; import java.io.StringReader; import java.io.StringWriter; +import java.net.URI; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.Callable; @@ -47,6 +49,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContextTestWrapper; import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.FileSystemTestWrapper; @@ -113,8 +116,8 @@ public class TestEncryptionZones { protected FileContextTestWrapper fcWrapper; protected String getKeyProviderURI() { - return JavaKeyStoreProvider.SCHEME_NAME + "://file" + testRootDir + - "/test.jks"; + return JavaKeyStoreProvider.SCHEME_NAME + "://file" + + new Path(testRootDir.toString(), "test.jks").toUri(); } @Before @@ -124,7 +127,7 @@ public void setup() throws Exception { // Set up java key store String testRoot = fsHelper.getTestRootDir(); testRootDir = new File(testRoot).getAbsoluteFile(); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, getKeyProviderURI()); + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, getKeyProviderURI()); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); // Lower the batch size for testing conf.setInt(DFSConfigKeys.DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES, @@ -669,7 +672,8 @@ public void testCipherSuiteNegotiation() throws Exception { // Check KeyProvider state // Flushing the KP on the NN, since it caches, and init a test one cluster.getNamesystem().getProvider().flush(); - KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0); + KeyProvider provider = KeyProviderFactory + .get(new URI(conf.get(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI)), conf); List keys = provider.getKeys(); assertEquals("Expected NN to have created one key per zone", 1, keys.size()); @@ -693,7 +697,7 @@ public void testCipherSuiteNegotiation() throws Exception { public void testCreateEZWithNoProvider() throws Exception { // Unset the key provider and make sure EZ ops don't work final Configuration clusterConf = cluster.getConfiguration(0); - clusterConf.set(KeyProviderFactory.KEY_PROVIDER_PATH, ""); + clusterConf.unset(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI); cluster.restartNameNode(true); cluster.waitActive(); final Path zone1 = new Path("/zone1"); @@ -705,13 +709,100 @@ public void testCreateEZWithNoProvider() throws Exception { assertExceptionContains("since no key provider is available", e); } final Path jksPath = new Path(testRootDir.toString(), "test.jks"); - clusterConf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + clusterConf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri() ); // Try listing EZs as well assertNumZones(0); } + @Test(timeout = 120000) + public void testIsEncryptedMethod() throws Exception { + doTestIsEncryptedMethod(new Path("/")); + doTestIsEncryptedMethod(new Path("/.reserved/raw")); + } + + private void doTestIsEncryptedMethod(Path prefix) throws Exception { + try { + dTIEM(prefix); + } finally { + for (FileStatus s : fsWrapper.listStatus(prefix)) { + fsWrapper.delete(s.getPath(), true); + } + } + } + + private void dTIEM(Path prefix) throws Exception { + final HdfsAdmin dfsAdmin = + new HdfsAdmin(FileSystem.getDefaultUri(conf), conf); + // Create an unencrypted file to check isEncrypted returns false + final Path baseFile = new Path(prefix, "base"); + fsWrapper.createFile(baseFile); + FileStatus stat = fsWrapper.getFileStatus(baseFile); + assertFalse("Expected isEncrypted to return false for " + baseFile, + stat.isEncrypted()); + + // Create an encrypted file to check isEncrypted returns true + final Path zone = new Path(prefix, "zone"); + fsWrapper.mkdir(zone, FsPermission.getDirDefault(), true); + dfsAdmin.createEncryptionZone(zone, TEST_KEY); + final Path encFile = new Path(zone, "encfile"); + fsWrapper.createFile(encFile); + stat = fsWrapper.getFileStatus(encFile); + assertTrue("Expected isEncrypted to return true for enc file" + encFile, + stat.isEncrypted()); + + // check that it returns true for an ez root + stat = fsWrapper.getFileStatus(zone); + assertTrue("Expected isEncrypted to return true for ezroot", + stat.isEncrypted()); + + // check that it returns true for a dir in the ez + final Path zoneSubdir = new Path(zone, "subdir"); + fsWrapper.mkdir(zoneSubdir, FsPermission.getDirDefault(), true); + stat = fsWrapper.getFileStatus(zoneSubdir); + assertTrue( + "Expected isEncrypted to return true for ez subdir " + zoneSubdir, + stat.isEncrypted()); + + // check that it returns false for a non ez dir + final Path nonEzDirPath = new Path(prefix, "nonzone"); + fsWrapper.mkdir(nonEzDirPath, FsPermission.getDirDefault(), true); + stat = fsWrapper.getFileStatus(nonEzDirPath); + assertFalse( + "Expected isEncrypted to return false for directory " + nonEzDirPath, + stat.isEncrypted()); + + // check that it returns true for listings within an ez + FileStatus[] statuses = fsWrapper.listStatus(zone); + for (FileStatus s : statuses) { + assertTrue("Expected isEncrypted to return true for ez stat " + zone, + s.isEncrypted()); + } + + statuses = fsWrapper.listStatus(encFile); + for (FileStatus s : statuses) { + assertTrue( + "Expected isEncrypted to return true for ez file stat " + encFile, + s.isEncrypted()); + } + + // check that it returns false for listings outside an ez + statuses = fsWrapper.listStatus(nonEzDirPath); + for (FileStatus s : statuses) { + assertFalse( + "Expected isEncrypted to return false for nonez stat " + nonEzDirPath, + s.isEncrypted()); + } + + statuses = fsWrapper.listStatus(baseFile); + for (FileStatus s : statuses) { + assertFalse( + "Expected isEncrypted to return false for non ez stat " + baseFile, + s.isEncrypted()); + } + } + private class MyInjector extends EncryptionFaultInjector { int generateCount; CountDownLatch ready; @@ -849,7 +940,7 @@ public void doCleanup() throws Exception { Future future = executor.submit(new CreateFileTask(fsWrapper, file)); // Flip-flop between two EZs to repeatedly fail - for (int i=0; i<10; i++) { + for (int i=0; i listZones = Lists.newArrayList(); + RemoteIterator it = dfsAdmin.listEncryptionZones(); + while (it.hasNext()) { + listZones.add(it.next()); + } + for (EncryptionZone z: listZones) { + System.out.println(z); + } + assertEquals("Did not expect additional encryption zones!", 1, + listZones.size()); + EncryptionZone listZone = listZones.get(0); + assertEquals("Got unexpected ez path", zone.toString(), + listZone.getPath().toString()); + assertEquals("Unexpected ez key", TEST_KEY2, listZone.getKeyName()); // Verify contents of the snapshotted file final Path snapshottedZoneFile = new Path( @@ -975,7 +1094,8 @@ public void testSnapshotsOnEncryptionZones() throws Exception { assertEquals("Contents of snapshotted file have changed unexpectedly", contents, DFSTestUtil.readFile(fs, snapshottedZoneFile)); - // Now delete the snapshots out of order and verify the zones are still correct + // Now delete the snapshots out of order and verify the zones are still + // correct fs.deleteSnapshot(zoneParent, snap2.getName()); assertEquals("Got unexpected ez path", zone.toString(), dfsAdmin.getEncryptionZoneForPath(snap1Zone).getPath().toString()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java index b6040045ac435..c74f99063ece5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java @@ -20,7 +20,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; -import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; @@ -60,7 +59,7 @@ public void setupCluster() throws Exception { fsHelper = new FileSystemTestHelper(); String testRoot = fsHelper.getTestRootDir(); testRootDir = new File(testRoot).getAbsoluteFile(); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + testRootDir + "/test.jks" ); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java index 28c253fd157f9..6119b6e09b726 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hdfs; +import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyList; import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.anyShort; import static org.mockito.Matchers.anyLong; -import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyObject; +import static org.mockito.Matchers.anyShort; +import static org.mockito.Matchers.anyString; import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.spy; @@ -38,7 +40,6 @@ import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; @@ -342,12 +343,12 @@ public void testFactory() throws Exception { Mockito.doReturn( new HdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission( (short) 777), "owner", "group", new byte[0], new byte[0], - 1010, 0, null)).when(mcp).getFileInfo(anyString()); + 1010, 0, null, (byte) 0)).when(mcp).getFileInfo(anyString()); Mockito .doReturn( new HdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission( (short) 777), "owner", "group", new byte[0], new byte[0], - 1010, 0, null)) + 1010, 0, null, (byte) 0)) .when(mcp) .create(anyString(), (FsPermission) anyObject(), anyString(), (EnumSetWritable) anyObject(), anyBoolean(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java index 20e4f4edf2702..cc497ac63b76d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java @@ -24,7 +24,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; -import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContextTestWrapper; import org.apache.hadoop.fs.FileStatus; @@ -70,7 +69,7 @@ public void setup() throws Exception { String testRoot = fsHelper.getTestRootDir(); File testRootDir = new File(testRoot).getAbsoluteFile(); final Path jksPath = new Path(testRootDir.toString(), "test.jks"); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri() ); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java index e2db636936d51..eec4e99da5e65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSnapshotCommands.java @@ -18,22 +18,11 @@ package org.apache.hadoop.hdfs; -import static org.junit.Assert.*; - -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.PrintStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DistributedFileSystem; -import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.tools.DFSAdmin; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.Tool; - import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -89,136 +78,87 @@ public void tearDown() throws IOException { } } - private void toolRun(Tool tool, String cmd, int retcode, String contain) - throws Exception { - String [] cmds = StringUtils.split(cmd, ' '); - System.out.flush(); - System.err.flush(); - PrintStream origOut = System.out; - PrintStream origErr = System.err; - String output = null; - int ret = 0; - try { - ByteArrayOutputStream bs = new ByteArrayOutputStream(1024); - PrintStream out = new PrintStream(bs); - System.setOut(out); - System.setErr(out); - ret = tool.run(cmds); - System.out.flush(); - System.err.flush(); - out.close(); - output = bs.toString(); - } finally { - System.setOut(origOut); - System.setErr(origErr); - } - System.out.println("Output for command: " + cmd + " retcode: " + ret); - if (output != null) { - System.out.println(output); - } - assertEquals(retcode, ret); - if (contain != null) { - assertTrue(output.contains(contain)); - } - } - - private void FsShellRun(String cmd, int retcode, String contain) - throws Exception { - FsShell shell = new FsShell(new Configuration(conf)); - toolRun(shell, cmd, retcode, contain); - } - - private void DFSAdminRun(String cmd, int retcode, String contain) - throws Exception { - DFSAdmin admin = new DFSAdmin(new Configuration(conf)); - toolRun(admin, cmd, retcode, contain); - } - - private void FsShellRun(String cmd) throws Exception { - FsShellRun(cmd, 0, null); - } - @Test public void testAllowSnapshot() throws Exception { // Idempotent test - DFSAdminRun("-allowSnapshot /sub1", 0, "Allowing snaphot on /sub1 succeeded"); + DFSTestUtil.DFSAdminRun("-allowSnapshot /sub1", 0, "Allowing snaphot on /sub1 succeeded", conf); // allow normal dir success - FsShellRun("-mkdir /sub2"); - DFSAdminRun("-allowSnapshot /sub2", 0, "Allowing snaphot on /sub2 succeeded"); + DFSTestUtil.FsShellRun("-mkdir /sub2", conf); + DFSTestUtil.DFSAdminRun("-allowSnapshot /sub2", 0, "Allowing snaphot on /sub2 succeeded", conf); // allow non-exists dir failed - DFSAdminRun("-allowSnapshot /sub3", -1, null); + DFSTestUtil.DFSAdminRun("-allowSnapshot /sub3", -1, null, conf); } @Test public void testCreateSnapshot() throws Exception { // test createSnapshot - FsShellRun("-createSnapshot /sub1 sn0", 0, "Created snapshot /sub1/.snapshot/sn0"); - FsShellRun("-createSnapshot /sub1 sn0", 1, "there is already a snapshot with the same name \"sn0\""); - FsShellRun("-rmr /sub1/sub1sub2"); - FsShellRun("-mkdir /sub1/sub1sub3"); - FsShellRun("-createSnapshot /sub1 sn1", 0, "Created snapshot /sub1/.snapshot/sn1"); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn0", 0, "Created snapshot /sub1/.snapshot/sn0", conf); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn0", 1, "there is already a snapshot with the same name \"sn0\"", conf); + DFSTestUtil.FsShellRun("-rmr /sub1/sub1sub2", conf); + DFSTestUtil.FsShellRun("-mkdir /sub1/sub1sub3", conf); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn1", 0, "Created snapshot /sub1/.snapshot/sn1", conf); // check snapshot contents - FsShellRun("-ls /sub1", 0, "/sub1/sub1sub1"); - FsShellRun("-ls /sub1", 0, "/sub1/sub1sub3"); - FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn0"); - FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn1"); - FsShellRun("-ls /sub1/.snapshot/sn0", 0, "/sub1/.snapshot/sn0/sub1sub1"); - FsShellRun("-ls /sub1/.snapshot/sn0", 0, "/sub1/.snapshot/sn0/sub1sub2"); - FsShellRun("-ls /sub1/.snapshot/sn1", 0, "/sub1/.snapshot/sn1/sub1sub1"); - FsShellRun("-ls /sub1/.snapshot/sn1", 0, "/sub1/.snapshot/sn1/sub1sub3"); + DFSTestUtil.FsShellRun("-ls /sub1", 0, "/sub1/sub1sub1", conf); + DFSTestUtil.FsShellRun("-ls /sub1", 0, "/sub1/sub1sub3", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn0", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn1", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn0", 0, "/sub1/.snapshot/sn0/sub1sub1", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn0", 0, "/sub1/.snapshot/sn0/sub1sub2", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn1", 0, "/sub1/.snapshot/sn1/sub1sub1", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn1", 0, "/sub1/.snapshot/sn1/sub1sub3", conf); } @Test public void testMkdirUsingReservedName() throws Exception { // test can not create dir with reserved name: .snapshot - FsShellRun("-ls /"); - FsShellRun("-mkdir /.snapshot", 1, "File exists"); - FsShellRun("-mkdir /sub1/.snapshot", 1, "File exists"); + DFSTestUtil.FsShellRun("-ls /", conf); + DFSTestUtil.FsShellRun("-mkdir /.snapshot", 1, "File exists", conf); + DFSTestUtil.FsShellRun("-mkdir /sub1/.snapshot", 1, "File exists", conf); // mkdir -p ignore reserved name check if dir already exists - FsShellRun("-mkdir -p /sub1/.snapshot"); - FsShellRun("-mkdir -p /sub1/sub1sub1/.snapshot", 1, "mkdir: \".snapshot\" is a reserved name."); + DFSTestUtil.FsShellRun("-mkdir -p /sub1/.snapshot", conf); + DFSTestUtil.FsShellRun("-mkdir -p /sub1/sub1sub1/.snapshot", 1, "mkdir: \".snapshot\" is a reserved name.", conf); } @Test public void testRenameSnapshot() throws Exception { - FsShellRun("-createSnapshot /sub1 sn.orig"); - FsShellRun("-renameSnapshot /sub1 sn.orig sn.rename"); - FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn.rename"); - FsShellRun("-ls /sub1/.snapshot/sn.rename", 0, "/sub1/.snapshot/sn.rename/sub1sub1"); - FsShellRun("-ls /sub1/.snapshot/sn.rename", 0, "/sub1/.snapshot/sn.rename/sub1sub2"); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn.orig", conf); + DFSTestUtil.FsShellRun("-renameSnapshot /sub1 sn.orig sn.rename", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot", 0, "/sub1/.snapshot/sn.rename", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn.rename", 0, "/sub1/.snapshot/sn.rename/sub1sub1", conf); + DFSTestUtil.FsShellRun("-ls /sub1/.snapshot/sn.rename", 0, "/sub1/.snapshot/sn.rename/sub1sub2", conf); //try renaming from a non-existing snapshot - FsShellRun("-renameSnapshot /sub1 sn.nonexist sn.rename", 1, - "renameSnapshot: The snapshot sn.nonexist does not exist for directory /sub1"); + DFSTestUtil.FsShellRun("-renameSnapshot /sub1 sn.nonexist sn.rename", 1, + "renameSnapshot: The snapshot sn.nonexist does not exist for directory /sub1", conf); //try renaming to existing snapshots - FsShellRun("-createSnapshot /sub1 sn.new"); - FsShellRun("-renameSnapshot /sub1 sn.new sn.rename", 1, - "renameSnapshot: The snapshot sn.rename already exists for directory /sub1"); - FsShellRun("-renameSnapshot /sub1 sn.rename sn.new", 1, - "renameSnapshot: The snapshot sn.new already exists for directory /sub1"); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn.new", conf); + DFSTestUtil.FsShellRun("-renameSnapshot /sub1 sn.new sn.rename", 1, + "renameSnapshot: The snapshot sn.rename already exists for directory /sub1", conf); + DFSTestUtil.FsShellRun("-renameSnapshot /sub1 sn.rename sn.new", 1, + "renameSnapshot: The snapshot sn.new already exists for directory /sub1", conf); } @Test public void testDeleteSnapshot() throws Exception { - FsShellRun("-createSnapshot /sub1 sn1"); - FsShellRun("-deleteSnapshot /sub1 sn1"); - FsShellRun("-deleteSnapshot /sub1 sn1", 1, - "deleteSnapshot: Cannot delete snapshot sn1 from path /sub1: the snapshot does not exist."); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn1", conf); + DFSTestUtil.FsShellRun("-deleteSnapshot /sub1 sn1", conf); + DFSTestUtil.FsShellRun("-deleteSnapshot /sub1 sn1", 1, + "deleteSnapshot: Cannot delete snapshot sn1 from path /sub1: the snapshot does not exist.", conf); } @Test public void testDisallowSnapshot() throws Exception { - FsShellRun("-createSnapshot /sub1 sn1"); + DFSTestUtil.FsShellRun("-createSnapshot /sub1 sn1", conf); // cannot delete snapshotable dir - FsShellRun("-rmr /sub1", 1, "The directory /sub1 cannot be deleted since /sub1 is snapshottable and already has snapshots"); - DFSAdminRun("-disallowSnapshot /sub1", -1, - "disallowSnapshot: The directory /sub1 has snapshot(s). Please redo the operation after removing all the snapshots."); - FsShellRun("-deleteSnapshot /sub1 sn1"); - DFSAdminRun("-disallowSnapshot /sub1", 0, "Disallowing snaphot on /sub1 succeeded"); + DFSTestUtil.FsShellRun("-rmr /sub1", 1, "The directory /sub1 cannot be deleted since /sub1 is snapshottable and already has snapshots", conf); + DFSTestUtil.DFSAdminRun("-disallowSnapshot /sub1", -1, + "disallowSnapshot: The directory /sub1 has snapshot(s). Please redo the operation after removing all the snapshots.", conf); + DFSTestUtil.FsShellRun("-deleteSnapshot /sub1 sn1", conf); + DFSTestUtil.DFSAdminRun("-disallowSnapshot /sub1", 0, "Disallowing snaphot on /sub1 succeeded", conf); // Idempotent test - DFSAdminRun("-disallowSnapshot /sub1", 0, "Disallowing snaphot on /sub1 succeeded"); + DFSTestUtil.DFSAdminRun("-disallowSnapshot /sub1", 0, "Disallowing snaphot on /sub1 succeeded", conf); // now it can be deleted - FsShellRun("-rmr /sub1"); + DFSTestUtil.FsShellRun("-rmr /sub1", conf); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestStoragePolicyCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestStoragePolicyCommands.java new file mode 100644 index 0000000000000..d6ead09376410 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestStoragePolicyCommands.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test storage policy related DFSAdmin commands + */ +public class TestStoragePolicyCommands { + private static final short REPL = 1; + private static final int SIZE = 128; + + private static Configuration conf; + private static MiniDFSCluster cluster; + private static DistributedFileSystem fs; + + @Before + public void clusterSetUp() throws IOException { + conf = new HdfsConfiguration(); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPL).build(); + cluster.waitActive(); + fs = cluster.getFileSystem(); + } + + @After + public void clusterShutdown() throws IOException{ + if(fs != null){ + fs.close(); + } + if(cluster != null){ + cluster.shutdown(); + } + } + + @Test + public void testSetAndGetStoragePolicy() throws Exception { + final Path foo = new Path("/foo"); + final Path bar = new Path(foo, "bar"); + DFSTestUtil.createFile(fs, bar, SIZE, REPL, 0); + + DFSTestUtil.DFSAdminRun("-setStoragePolicy /foo WARM", 0, + "Set storage policy WARM on " + foo.toString(), conf); + DFSTestUtil.DFSAdminRun("-setStoragePolicy /foo/bar COLD", 0, + "Set storage policy COLD on " + bar.toString(), conf); + DFSTestUtil.DFSAdminRun("-setStoragePolicy /fooz WARM", -1, + "File/Directory does not exist: /fooz", conf); + + final BlockStoragePolicy.Suite suite = BlockStoragePolicy + .readBlockStorageSuite(conf); + final BlockStoragePolicy warm = suite.getPolicy("WARM"); + final BlockStoragePolicy cold = suite.getPolicy("COLD"); + DFSTestUtil.DFSAdminRun("-getStoragePolicy /foo", 0, + "The storage policy of " + foo.toString() + ":\n" + warm, conf); + DFSTestUtil.DFSAdminRun("-getStoragePolicy /foo/bar", 0, + "The storage policy of " + bar.toString() + ":\n" + cold, conf); + DFSTestUtil.DFSAdminRun("-getStoragePolicy /fooz", -1, + "File/Directory does not exist: /fooz", conf); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java index 7602f44b0b054..0d860b4f064eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hdfs.protocol.datatransfer.sasl; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATA_TRANSFER_PROTECTION_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTP_POLICY_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.IGNORE_SECURE_PORTS_FOR_TESTING_KEY; + import static org.junit.Assert.*; import java.io.IOException; @@ -29,11 +32,13 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.io.IOUtils; import org.junit.After; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.junit.rules.Timeout; public class TestSaslDataTransfer extends SaslDataTransferTestCase { @@ -49,6 +54,9 @@ public class TestSaslDataTransfer extends SaslDataTransferTestCase { @Rule public ExpectedException exception = ExpectedException.none(); + @Rule + public Timeout timeout = new Timeout(60000); + @After public void shutdown() { IOUtils.cleanup(null, fs); @@ -98,17 +106,6 @@ public void testClientAndServerDoNotHaveCommonQop() throws Exception { doTest(clientConf); } - @Test - public void testClientSaslNoServerSasl() throws Exception { - HdfsConfiguration clusterConf = createSecureConfig(""); - startCluster(clusterConf); - HdfsConfiguration clientConf = new HdfsConfiguration(clusterConf); - clientConf.set(DFS_DATA_TRANSFER_PROTECTION_KEY, "authentication"); - exception.expect(IOException.class); - exception.expectMessage("could only be replicated to 0 nodes"); - doTest(clientConf); - } - @Test public void testServerSaslNoClientSasl() throws Exception { HdfsConfiguration clusterConf = createSecureConfig( @@ -121,6 +118,32 @@ public void testServerSaslNoClientSasl() throws Exception { doTest(clientConf); } + @Test + public void testDataNodeAbortsIfNoSasl() throws Exception { + HdfsConfiguration clusterConf = createSecureConfig(""); + exception.expect(RuntimeException.class); + exception.expectMessage("Cannot start secure DataNode"); + startCluster(clusterConf); + } + + @Test + public void testDataNodeAbortsIfNotHttpsOnly() throws Exception { + HdfsConfiguration clusterConf = createSecureConfig("authentication"); + clusterConf.set(DFS_HTTP_POLICY_KEY, + HttpConfig.Policy.HTTP_AND_HTTPS.name()); + exception.expect(RuntimeException.class); + exception.expectMessage("Cannot start secure DataNode"); + startCluster(clusterConf); + } + + @Test + public void testNoSaslAndSecurePortsIgnored() throws Exception { + HdfsConfiguration clusterConf = createSecureConfig(""); + clusterConf.setBoolean(IGNORE_SECURE_PORTS_FOR_TESTING_KEY, true); + startCluster(clusterConf); + doTest(clusterConf); + } + /** * Tests DataTransferProtocol with the given client configuration. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java index a7ee927eefc69..1bf3add16de91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java @@ -287,7 +287,7 @@ public void testFailToStartWithBadConfig() throws Exception { // Directory which cannot be created conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, Shell.WINDOWS ? "\\\\cannotBeCreated" : "/proc/does-not-exist"); - assertJNFailsToStart(conf, "Can not create directory"); + assertJNFailsToStart(conf, "Cannot create directory"); } private static void assertJNFailsToStart(Configuration conf, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index 72597d2b75926..dbc3212a22bf7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -89,7 +89,14 @@ public class TestBalancer { private static final Random r = new Random(); static { + initTestSetup(); + } + + public static void initTestSetup() { Dispatcher.setBlockMoveWaitTime(1000L) ; + + // do not create id file since it occupies the disk space + NameNodeConnector.setWrite2IdFile(false); } static void initConf(Configuration conf) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java index d9d70d1a3ceb7..bd9136655f63a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java @@ -44,7 +44,7 @@ public class TestBalancerWithHANameNodes { ClientProtocol client; static { - Dispatcher.setBlockMoveWaitTime(1000L); + TestBalancer.initTestSetup(); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java index a16a979100909..6ee6e545416d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java @@ -73,7 +73,7 @@ public class TestBalancerWithMultipleNameNodes { private static final Random RANDOM = new Random(); static { - Dispatcher.setBlockMoveWaitTime(1000L) ; + TestBalancer.initTestSetup(); } /** Common objects used in various methods. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java index 9961a2e2704be..7af3a0e7d7d69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java @@ -75,7 +75,7 @@ public class TestBalancerWithNodeGroup { static final int DEFAULT_BLOCK_SIZE = 100; static { - Dispatcher.setBlockMoveWaitTime(1000L) ; + TestBalancer.initTestSetup(); } static Configuration createConf() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index 1a8262fb6c4c5..7c0623cd46832 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; @@ -52,6 +53,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetworkTopology; +import org.junit.Assert; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Before; import org.junit.Test; @@ -607,7 +609,6 @@ public void testSafeModeIBRAfterIncremental() throws Exception { assertEquals(1, ds.getBlockReportCount()); } - /** * Tests that a namenode doesn't choose a datanode with full disks to * store blocks. @@ -654,5 +655,20 @@ public void testStorageWithRemainingCapacity() throws Exception { cluster.shutdown(); } } -} + @Test + public void testUseDelHint() { + DatanodeStorageInfo delHint = new DatanodeStorageInfo( + DFSTestUtil.getLocalDatanodeDescriptor(), new DatanodeStorage("id")); + List moreThan1Racks = Arrays.asList(delHint); + List excessTypes = new ArrayList(); + + excessTypes.add(StorageType.DEFAULT); + Assert.assertTrue(BlockManager.useDelHint(true, delHint, null, + moreThan1Racks, excessTypes)); + excessTypes.remove(0); + excessTypes.add(StorageType.SSD); + Assert.assertFalse(BlockManager.useDelHint(true, delHint, null, + moreThan1Racks, excessTypes)); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index e575ceeb7abb9..b8f358f28bb96 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; @@ -47,13 +48,14 @@ import org.apache.hadoop.hdfs.LogVerificationAppender; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.StatefulBlockInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; -import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.StatefulBlockInfo; import org.apache.hadoop.hdfs.server.namenode.FSClusterStats; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.Namesystem; @@ -228,7 +230,7 @@ private static DatanodeStorageInfo[] chooseTarget( List chosenNodes, Set excludedNodes) { return replicator.chooseTarget(filename, numOfReplicas, writer, chosenNodes, - false, excludedNodes, BLOCK_SIZE, StorageType.DEFAULT); + false, excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); } /** @@ -295,7 +297,7 @@ public void testChooseTarget2() throws Exception { excludedNodes.add(dataNodes[1]); chosenNodes.add(storages[2]); targets = replicator.chooseTarget(filename, 1, dataNodes[0], chosenNodes, true, - excludedNodes, BLOCK_SIZE, StorageType.DEFAULT); + excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); System.out.println("targets=" + Arrays.asList(targets)); assertEquals(2, targets.length); //make sure that the chosen node is in the target. @@ -630,7 +632,7 @@ public void testChooseTargetWithMoreThanHalfStaleNodes() throws Exception { .getNamesystem().getBlockManager().getBlockPlacementPolicy(); DatanodeStorageInfo[] targets = replicator.chooseTarget(filename, 3, staleNodeInfo, new ArrayList(), false, null, - BLOCK_SIZE, StorageType.DEFAULT); + BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); assertEquals(targets.length, 3); assertFalse(isOnSameRack(targets[0], staleNodeInfo)); @@ -656,7 +658,7 @@ public void testChooseTargetWithMoreThanHalfStaleNodes() throws Exception { // Call chooseTarget targets = replicator.chooseTarget(filename, 3, staleNodeInfo, new ArrayList(), false, null, BLOCK_SIZE, - StorageType.DEFAULT); + TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); assertEquals(targets.length, 3); assertTrue(isOnSameRack(targets[0], staleNodeInfo)); @@ -933,8 +935,16 @@ public void testChooseReplicaToDelete() throws Exception { // replica nodes, while storages[2] and dataNodes[5] are in second set. assertEquals(2, first.size()); assertEquals(2, second.size()); + List excessTypes = new ArrayList(); + { + // test returning null + excessTypes.add(StorageType.SSD); + assertNull(replicator.chooseReplicaToDelete( + null, null, (short)3, first, second, excessTypes)); + } + excessTypes.add(StorageType.DEFAULT); DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( - null, null, (short)3, first, second); + null, null, (short)3, first, second, excessTypes); // Within first set, storages[1] with less free space assertEquals(chosen, storages[1]); @@ -942,11 +952,12 @@ public void testChooseReplicaToDelete() throws Exception { assertEquals(0, first.size()); assertEquals(3, second.size()); // Within second set, storages[5] with less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)2, first, second); + null, null, (short)2, first, second, excessTypes); assertEquals(chosen, storages[5]); } - + /** * This testcase tests whether the default value returned by * DFSUtil.getInvalidateWorkPctPerIteration() is positive, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java index bf972c03ca768..0273da081f453 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java @@ -17,17 +17,28 @@ */ package org.apache.hadoop.hdfs.server.blockmanagement; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; -import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; -import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NodeType; +import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; @@ -37,17 +48,6 @@ import org.junit.BeforeClass; import org.junit.Test; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - public class TestReplicationPolicyConsiderLoad { private static NameNode namenode; @@ -146,7 +146,7 @@ public void testChooseTargetWithDecomNodes() throws IOException { DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager() .getBlockPlacementPolicy().chooseTarget("testFile.txt", 3, dataNodes[0], new ArrayList(), false, null, - 1024, StorageType.DEFAULT); + 1024, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); assertEquals(3, targets.length); Set targetSet = new HashSet( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java index b615876b62bb9..526c490422e6c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.net.NetworkTopology; @@ -258,7 +259,7 @@ private DatanodeStorageInfo[] chooseTarget( List chosenNodes, Set excludedNodes) { return replicator.chooseTarget(filename, numOfReplicas, writer, chosenNodes, - false, excludedNodes, BLOCK_SIZE, StorageType.DEFAULT); + false, excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); } /** @@ -340,7 +341,7 @@ public void testChooseTarget2() throws Exception { Set excludedNodes = new HashSet(); excludedNodes.add(dataNodes[1]); targets = repl.chooseTarget(filename, 4, dataNodes[0], chosenNodes, false, - excludedNodes, BLOCK_SIZE, StorageType.DEFAULT); + excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); assertEquals(targets.length, 4); assertEquals(storages[0], targets[0]); @@ -358,7 +359,7 @@ public void testChooseTarget2() throws Exception { excludedNodes.add(dataNodes[1]); chosenNodes.add(storages[2]); targets = repl.chooseTarget(filename, 1, dataNodes[0], chosenNodes, true, - excludedNodes, BLOCK_SIZE, StorageType.DEFAULT); + excludedNodes, BLOCK_SIZE, TestBlockStoragePolicy.DEFAULT_STORAGE_POLICY); System.out.println("targets=" + Arrays.asList(targets)); assertEquals(2, targets.length); //make sure that the chosen node is in the target. @@ -612,8 +613,10 @@ public void testChooseReplicaToDelete() throws Exception { replicaList, rackMap, first, second); assertEquals(3, first.size()); assertEquals(1, second.size()); + List excessTypes = new ArrayList(); + excessTypes.add(StorageType.DEFAULT); DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( - null, null, (short)3, first, second); + null, null, (short)3, first, second, excessTypes); // Within first set {dataNodes[0], dataNodes[1], dataNodes[2]}, // dataNodes[0] and dataNodes[1] are in the same nodegroup, // but dataNodes[1] is chosen as less free space @@ -624,16 +627,18 @@ public void testChooseReplicaToDelete() throws Exception { assertEquals(1, second.size()); // Within first set {dataNodes[0], dataNodes[2]}, dataNodes[2] is chosen // as less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)2, first, second); + null, null, (short)2, first, second, excessTypes); assertEquals(chosen, storages[2]); replicator.adjustSetsWithChosenReplica(rackMap, first, second, chosen); assertEquals(0, first.size()); assertEquals(2, second.size()); // Within second set, dataNodes[5] with less free space + excessTypes.add(StorageType.DEFAULT); chosen = replicator.chooseReplicaToDelete( - null, null, (short)1, first, second); + null, null, (short)1, first, second, excessTypes); assertEquals(chosen, storages[5]); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java index d0fad6e2b8548..83d93f0dac45e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java @@ -1093,7 +1093,8 @@ public List getVolumes() { } @Override - public void addVolumes(Collection volumes) { + public List addVolumes(List volumes, + final Collection bpids) { throw new UnsupportedOperationException(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java new file mode 100644 index 0000000000000..f6e984b7ed0b4 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeHotSwapVolumes.java @@ -0,0 +1,481 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.datanode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.ReconfigurationException; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.BlockMissingException; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; +import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.channels.FileChannel; +import java.nio.channels.FileLock; +import java.nio.channels.OverlappingFileLockException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeoutException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +public class TestDataNodeHotSwapVolumes { + private static final Log LOG = LogFactory.getLog( + TestDataNodeHotSwapVolumes.class); + private static final int BLOCK_SIZE = 512; + private MiniDFSCluster cluster; + + @After + public void tearDown() { + shutdown(); + } + + private void startDFSCluster(int numNameNodes, int numDataNodes) + throws IOException { + shutdown(); + Configuration conf = new Configuration(); + conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + + /* + * Lower the DN heartbeat, DF rate, and recheck interval to one second + * so state about failures and datanode death propagates faster. + */ + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_DF_INTERVAL_KEY, 1000); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, + 1000); + + MiniDFSNNTopology nnTopology = + MiniDFSNNTopology.simpleFederatedTopology(numNameNodes); + + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(nnTopology) + .numDataNodes(numDataNodes) + .build(); + cluster.waitActive(); + } + + private void shutdown() { + if (cluster != null) { + cluster.shutdown(); + cluster = null; + } + } + + private void createFile(Path path, int numBlocks) + throws IOException, InterruptedException, TimeoutException { + final short replicateFactor = 1; + createFile(path, numBlocks, replicateFactor); + } + + private void createFile(Path path, int numBlocks, short replicateFactor) + throws IOException, InterruptedException, TimeoutException { + createFile(0, path, numBlocks, replicateFactor); + } + + private void createFile(int fsIdx, Path path, int numBlocks) + throws IOException, InterruptedException, TimeoutException { + final short replicateFactor = 1; + createFile(fsIdx, path, numBlocks, replicateFactor); + } + + private void createFile(int fsIdx, Path path, int numBlocks, + short replicateFactor) + throws IOException, TimeoutException, InterruptedException { + final int seed = 0; + final DistributedFileSystem fs = cluster.getFileSystem(fsIdx); + DFSTestUtil.createFile(fs, path, BLOCK_SIZE * numBlocks, + replicateFactor, seed); + DFSTestUtil.waitReplication(fs, path, replicateFactor); + } + + /** + * Verify whether a file has enough content. + */ + private static void verifyFileLength(FileSystem fs, Path path, int numBlocks) + throws IOException { + FileStatus status = fs.getFileStatus(path); + assertEquals(numBlocks * BLOCK_SIZE, status.getLen()); + } + + /** Return the number of replicas for a given block in the file. */ + private static int getNumReplicas(FileSystem fs, Path file, + int blockIdx) throws IOException { + BlockLocation locs[] = fs.getFileBlockLocations(file, 0, Long.MAX_VALUE); + return locs.length < blockIdx + 1 ? 0 : locs[blockIdx].getNames().length; + } + + /** + * Wait the block to have the exact number of replicas as expected. + */ + private static void waitReplication(FileSystem fs, Path file, int blockIdx, + int numReplicas) + throws IOException, TimeoutException, InterruptedException { + int attempts = 50; // Wait 5 seconds. + while (attempts > 0) { + if (getNumReplicas(fs, file, blockIdx) == numReplicas) { + return; + } + Thread.sleep(100); + attempts--; + } + throw new TimeoutException("Timed out waiting the " + blockIdx + "-th block" + + " of " + file + " to have " + numReplicas + " replicas."); + } + + /** Parses data dirs from DataNode's configuration. */ + private static Collection getDataDirs(DataNode datanode) { + return datanode.getConf().getTrimmedStringCollection( + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); + } + + @Test + public void testParseChangedVolumes() throws IOException { + startDFSCluster(1, 1); + DataNode dn = cluster.getDataNodes().get(0); + Configuration conf = dn.getConf(); + + String oldPaths = conf.get(DFS_DATANODE_DATA_DIR_KEY); + List oldLocations = new ArrayList(); + for (String path : oldPaths.split(",")) { + oldLocations.add(StorageLocation.parse(path)); + } + assertFalse(oldLocations.isEmpty()); + + String newPaths = "/foo/path1,/foo/path2"; + conf.set(DFS_DATANODE_DATA_DIR_KEY, newPaths); + + DataNode.ChangedVolumes changedVolumes =dn.parseChangedVolumes(); + List newVolumes = changedVolumes.newLocations; + assertEquals(2, newVolumes.size()); + assertEquals(new File("/foo/path1").getAbsolutePath(), + newVolumes.get(0).getFile().getAbsolutePath()); + assertEquals(new File("/foo/path2").getAbsolutePath(), + newVolumes.get(1).getFile().getAbsolutePath()); + + List removedVolumes = changedVolumes.deactivateLocations; + assertEquals(oldLocations.size(), removedVolumes.size()); + for (int i = 0; i < removedVolumes.size(); i++) { + assertEquals(oldLocations.get(i).getFile(), + removedVolumes.get(i).getFile()); + } + } + + @Test + public void testParseChangedVolumesFailures() throws IOException { + startDFSCluster(1, 1); + DataNode dn = cluster.getDataNodes().get(0); + Configuration conf = dn.getConf(); + try { + conf.set(DFS_DATANODE_DATA_DIR_KEY, ""); + dn.parseChangedVolumes(); + fail("Should throw IOException: empty inputs."); + } catch (IOException e) { + GenericTestUtils.assertExceptionContains("No directory is specified.", e); + } + } + + /** Add volumes to the first DataNode. */ + private void addVolumes(int numNewVolumes) throws ReconfigurationException { + File dataDir = new File(cluster.getDataDirectory()); + DataNode dn = cluster.getDataNodes().get(0); // First DataNode. + Configuration conf = dn.getConf(); + String oldDataDir = conf.get(DFS_DATANODE_DATA_DIR_KEY); + + List newVolumeDirs = new ArrayList(); + StringBuilder newDataDirBuf = new StringBuilder(oldDataDir); + int startIdx = oldDataDir.split(",").length + 1; + // Find the first available (non-taken) directory name for data volume. + while (true) { + File volumeDir = new File(dataDir, "data" + startIdx); + if (!volumeDir.exists()) { + break; + } + startIdx++; + } + for (int i = startIdx; i < startIdx + numNewVolumes; i++) { + File volumeDir = new File(dataDir, "data" + String.valueOf(i)); + newVolumeDirs.add(volumeDir); + volumeDir.mkdirs(); + newDataDirBuf.append(","); + newDataDirBuf.append(volumeDir.toURI()); + } + + String newDataDir = newDataDirBuf.toString(); + dn.reconfigurePropertyImpl(DFS_DATANODE_DATA_DIR_KEY, newDataDir); + assertEquals(newDataDir, conf.get(DFS_DATANODE_DATA_DIR_KEY)); + + // Check that all newly created volumes are appropriately formatted. + for (File volumeDir : newVolumeDirs) { + File curDir = new File(volumeDir, "current"); + assertTrue(curDir.exists()); + assertTrue(curDir.isDirectory()); + } + } + + private List> getNumBlocksReport(int namesystemIdx) { + List> results = new ArrayList>(); + final String bpid = cluster.getNamesystem(namesystemIdx).getBlockPoolId(); + List> blockReports = + cluster.getAllBlockReports(bpid); + for (Map datanodeReport : blockReports) { + List numBlocksPerDN = new ArrayList(); + for (BlockListAsLongs blocks : datanodeReport.values()) { + numBlocksPerDN.add(blocks.getNumberOfBlocks()); + } + results.add(numBlocksPerDN); + } + return results; + } + + /** + * Test adding one volume on a running MiniDFSCluster with only one NameNode. + */ + @Test + public void testAddOneNewVolume() + throws IOException, ReconfigurationException, + InterruptedException, TimeoutException { + startDFSCluster(1, 1); + String bpid = cluster.getNamesystem().getBlockPoolId(); + final int numBlocks = 10; + + addVolumes(1); + + Path testFile = new Path("/test"); + createFile(testFile, numBlocks); + + List> blockReports = + cluster.getAllBlockReports(bpid); + assertEquals(1, blockReports.size()); // 1 DataNode + assertEquals(3, blockReports.get(0).size()); // 3 volumes + + // FSVolumeList uses Round-Robin block chooser by default. Thus the new + // blocks should be evenly located in all volumes. + int minNumBlocks = Integer.MAX_VALUE; + int maxNumBlocks = Integer.MIN_VALUE; + for (BlockListAsLongs blockList : blockReports.get(0).values()) { + minNumBlocks = Math.min(minNumBlocks, blockList.getNumberOfBlocks()); + maxNumBlocks = Math.max(maxNumBlocks, blockList.getNumberOfBlocks()); + } + assertTrue(Math.abs(maxNumBlocks - maxNumBlocks) <= 1); + verifyFileLength(cluster.getFileSystem(), testFile, numBlocks); + } + + @Test(timeout = 60000) + public void testAddVolumesDuringWrite() + throws IOException, InterruptedException, TimeoutException, + ReconfigurationException { + startDFSCluster(1, 1); + String bpid = cluster.getNamesystem().getBlockPoolId(); + Path testFile = new Path("/test"); + createFile(testFile, 4); // Each volume has 2 blocks. + + addVolumes(2); + + // Continue to write the same file, thus the new volumes will have blocks. + DFSTestUtil.appendFile(cluster.getFileSystem(), testFile, BLOCK_SIZE * 8); + verifyFileLength(cluster.getFileSystem(), testFile, 8 + 4); + // After appending data, there should be [2, 2, 4, 4] blocks in each volume + // respectively. + List expectedNumBlocks = Arrays.asList(2, 2, 4, 4); + + List> blockReports = + cluster.getAllBlockReports(bpid); + assertEquals(1, blockReports.size()); // 1 DataNode + assertEquals(4, blockReports.get(0).size()); // 4 volumes + Map dnReport = + blockReports.get(0); + List actualNumBlocks = new ArrayList(); + for (BlockListAsLongs blockList : dnReport.values()) { + actualNumBlocks.add(blockList.getNumberOfBlocks()); + } + Collections.sort(actualNumBlocks); + assertEquals(expectedNumBlocks, actualNumBlocks); + } + + @Test + public void testAddVolumesToFederationNN() + throws IOException, TimeoutException, InterruptedException, + ReconfigurationException { + // Starts a Cluster with 2 NameNode and 3 DataNodes. Each DataNode has 2 + // volumes. + final int numNameNodes = 2; + final int numDataNodes = 1; + startDFSCluster(numNameNodes, numDataNodes); + Path testFile = new Path("/test"); + // Create a file on the first namespace with 4 blocks. + createFile(0, testFile, 4); + // Create a file on the second namespace with 4 blocks. + createFile(1, testFile, 4); + + // Add 2 volumes to the first DataNode. + final int numNewVolumes = 2; + addVolumes(numNewVolumes); + + // Append to the file on the first namespace. + DFSTestUtil.appendFile(cluster.getFileSystem(0), testFile, BLOCK_SIZE * 8); + + List> actualNumBlocks = getNumBlocksReport(0); + assertEquals(cluster.getDataNodes().size(), actualNumBlocks.size()); + List blocksOnFirstDN = actualNumBlocks.get(0); + Collections.sort(blocksOnFirstDN); + assertEquals(Arrays.asList(2, 2, 4, 4), blocksOnFirstDN); + + // Verify the second namespace also has the new volumes and they are empty. + actualNumBlocks = getNumBlocksReport(1); + assertEquals(4, actualNumBlocks.get(0).size()); + assertEquals(numNewVolumes, + Collections.frequency(actualNumBlocks.get(0), 0)); + } + + @Test + public void testRemoveOneVolume() + throws ReconfigurationException, InterruptedException, TimeoutException, + IOException { + startDFSCluster(1, 1); + final short replFactor = 1; + Path testFile = new Path("/test"); + createFile(testFile, 10, replFactor); + + DataNode dn = cluster.getDataNodes().get(0); + Collection oldDirs = getDataDirs(dn); + String newDirs = oldDirs.iterator().next(); // Keep the first volume. + dn.reconfigurePropertyImpl( + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs); + assertFileLocksReleased( + new ArrayList(oldDirs).subList(1, oldDirs.size())); + dn.scheduleAllBlockReport(0); + + try { + DFSTestUtil.readFile(cluster.getFileSystem(), testFile); + fail("Expect to throw BlockMissingException."); + } catch (BlockMissingException e) { + GenericTestUtils.assertExceptionContains("Could not obtain block", e); + } + + Path newFile = new Path("/newFile"); + createFile(newFile, 6); + + String bpid = cluster.getNamesystem().getBlockPoolId(); + List> blockReports = + cluster.getAllBlockReports(bpid); + assertEquals((int)replFactor, blockReports.size()); + + BlockListAsLongs blocksForVolume1 = + blockReports.get(0).values().iterator().next(); + // The first volume has half of the testFile and full of newFile. + assertEquals(10 / 2 + 6, blocksForVolume1.getNumberOfBlocks()); + } + + @Test + public void testReplicatingAfterRemoveVolume() + throws InterruptedException, TimeoutException, IOException, + ReconfigurationException { + startDFSCluster(1, 2); + final DistributedFileSystem fs = cluster.getFileSystem(); + final short replFactor = 2; + Path testFile = new Path("/test"); + createFile(testFile, 4, replFactor); + + DataNode dn = cluster.getDataNodes().get(0); + Collection oldDirs = getDataDirs(dn); + String newDirs = oldDirs.iterator().next(); // Keep the first volume. + dn.reconfigurePropertyImpl( + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs); + assertFileLocksReleased( + new ArrayList(oldDirs).subList(1, oldDirs.size())); + + // Force DataNode to report missing blocks. + dn.scheduleAllBlockReport(0); + DataNodeTestUtils.triggerDeletionReport(dn); + + // The 2nd block only has 1 replica due to the removed data volume. + waitReplication(fs, testFile, 1, 1); + + // Wait NameNode to replica missing blocks. + DFSTestUtil.waitReplication(fs, testFile, replFactor); + } + + /** + * Asserts that the storage lock file in each given directory has been + * released. This method works by trying to acquire the lock file itself. If + * locking fails here, then the main code must have failed to release it. + * + * @param dirs every storage directory to check + * @throws IOException if there is an unexpected I/O error + */ + private static void assertFileLocksReleased(Collection dirs) + throws IOException { + for (String dir: dirs) { + StorageLocation sl = StorageLocation.parse(dir); + File lockFile = new File(sl.getFile(), Storage.STORAGE_FILE_LOCK); + RandomAccessFile raf = null; + FileChannel channel = null; + FileLock lock = null; + try { + raf = new RandomAccessFile(lockFile, "rws"); + channel = raf.getChannel(); + lock = channel.tryLock(); + assertNotNull(String.format( + "Lock file at %s appears to be held by a different process.", + lockFile.getAbsolutePath()), lock); + } catch (OverlappingFileLockException e) { + fail(String.format("Must release lock file at %s.", + lockFile.getAbsolutePath())); + } finally { + if (lock != null) { + try { + lock.release(); + } catch (IOException e) { + LOG.warn(String.format("I/O error releasing file lock %s.", + lockFile.getAbsolutePath()), e); + } + } + IOUtils.cleanup(null, channel, raf); + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java index 1d700e3cdfcb0..ed322437da9d9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataStorage.java @@ -63,6 +63,7 @@ public void setUp() throws IOException { @After public void tearDown() throws IOException { + storage.unlockAll(); FileUtil.fullyDelete(TEST_DIR); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCacheRevocation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCacheRevocation.java index af28ed70652bd..d5531db8e1e25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCacheRevocation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCacheRevocation.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import static org.junit.Assume.assumeTrue; + import java.io.File; import java.nio.ByteBuffer; import java.util.EnumSet; @@ -41,6 +43,7 @@ import org.apache.hadoop.io.nativeio.NativeIO.POSIX.NoMlockCacheManipulator; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; +import org.apache.hadoop.util.NativeCodeLoader; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -94,6 +97,7 @@ private static Configuration getDefaultConf() { */ @Test(timeout=120000) public void testPinning() throws Exception { + assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS); Configuration conf = getDefaultConf(); // Set a really long revocation timeout, so that we won't reach it during // this test. @@ -143,6 +147,7 @@ public void testPinning() throws Exception { */ @Test(timeout=120000) public void testRevocation() throws Exception { + assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS); BlockReaderTestUtil.enableHdfsCachingTracing(); BlockReaderTestUtil.enableShortCircuitShmTracing(); Configuration conf = getDefaultConf(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java index 2c4c401205ec6..10b9f7e30a9f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestFsDatasetImpl.java @@ -40,7 +40,10 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; @@ -117,6 +120,7 @@ public void testAddVolumes() throws IOException { final int numExistingVolumes = dataset.getVolumes().size(); final int totalVolumes = numNewVolumes + numExistingVolumes; List newLocations = new ArrayList(); + Set expectedVolumes = new HashSet(); for (int i = 0; i < numNewVolumes; i++) { String path = BASE_DIR + "/newData" + i; newLocations.add(StorageLocation.parse(path)); @@ -125,13 +129,15 @@ public void testAddVolumes() throws IOException { } when(storage.getNumStorageDirs()).thenReturn(totalVolumes); - dataset.addVolumes(newLocations); + dataset.addVolumes(newLocations, Arrays.asList(BLOCK_POOL_IDS)); assertEquals(totalVolumes, dataset.getVolumes().size()); assertEquals(totalVolumes, dataset.storageMap.size()); + + Set actualVolumes = new HashSet(); for (int i = 0; i < numNewVolumes; i++) { - assertEquals(newLocations.get(i).getFile().getPath(), - dataset.getVolumes().get(numExistingVolumes + i).getBasePath()); + dataset.getVolumes().get(numExistingVolumes + i).getBasePath(); } + assertEquals(actualVolumes, expectedVolumes); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java index e6a03d231ede0..a870aa90da0d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java @@ -111,7 +111,7 @@ public void testWriteToRbw() throws Exception { // test writeToTemporary @Test - public void testWriteToTempoary() throws Exception { + public void testWriteToTemporary() throws Exception { MiniDFSCluster cluster = new MiniDFSCluster.Builder(new HdfsConfiguration()).build(); try { cluster.waitActive(); @@ -475,5 +475,28 @@ private void testWriteToTemporary(FsDatasetImpl dataSet, ExtendedBlock[] blocks) } dataSet.createTemporary(StorageType.DEFAULT, blocks[NON_EXISTENT]); + + try { + dataSet.createTemporary(StorageType.DEFAULT, blocks[NON_EXISTENT]); + Assert.fail("Should not have created a replica that had already been " + + "created " + blocks[NON_EXISTENT]); + } catch (Exception e) { + Assert.assertTrue( + e.getMessage().contains(blocks[NON_EXISTENT].getBlockName())); + Assert.assertTrue(e instanceof ReplicaAlreadyExistsException); + } + + long newGenStamp = blocks[NON_EXISTENT].getGenerationStamp() * 10; + blocks[NON_EXISTENT].setGenerationStamp(newGenStamp); + try { + ReplicaInPipeline replicaInfo = + dataSet.createTemporary(StorageType.DEFAULT, blocks[NON_EXISTENT]); + Assert.assertTrue(replicaInfo.getGenerationStamp() == newGenStamp); + Assert.assertTrue( + replicaInfo.getBlockId() == blocks[NON_EXISTENT].getBlockId()); + } catch (ReplicaAlreadyExistsException e) { + Assert.fail("createRbw() Should have removed the block with the older " + + "genstamp and replaced it with the newer one: " + blocks[NON_EXISTENT]); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java new file mode 100644 index 0000000000000..5866c7f738443 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.mover; + +import java.io.IOException; +import java.net.URI; +import java.util.*; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.*; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DBlock; +import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; +import org.apache.hadoop.hdfs.server.mover.Mover.MLocation; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.GenericOptionsParser; +import org.junit.Assert; +import org.junit.Test; + +public class TestMover { + static Mover newMover(Configuration conf) throws IOException { + final Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(1, namenodes.size()); + + final List nncs = NameNodeConnector.newNameNodeConnectors( + namenodes, Mover.class.getSimpleName(), Mover.MOVER_ID_PATH, conf); + return new Mover(nncs.get(0), conf); + } + + @Test + public void testScheduleSameBlock() throws IOException { + final Configuration conf = new HdfsConfiguration(); + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(4).build(); + try { + cluster.waitActive(); + final DistributedFileSystem dfs = cluster.getFileSystem(); + final String file = "/testScheduleSameBlock/file"; + + { + final FSDataOutputStream out = dfs.create(new Path(file)); + out.writeChars("testScheduleSameBlock"); + out.close(); + } + + final Mover mover = newMover(conf); + mover.init(); + final Mover.Processor processor = mover.new Processor(); + + final LocatedBlock lb = dfs.getClient().getLocatedBlocks(file, 0).get(0); + final List locations = MLocation.toLocations(lb); + final MLocation ml = locations.get(0); + final DBlock db = mover.newDBlock(lb.getBlock().getLocalBlock(), locations); + + final List storageTypes = new ArrayList( + Arrays.asList(StorageType.DEFAULT, StorageType.DEFAULT)); + Assert.assertTrue(processor.scheduleMoveReplica(db, ml, storageTypes)); + Assert.assertFalse(processor.scheduleMoveReplica(db, ml, storageTypes)); + } finally { + cluster.shutdown(); + } + } + + private void checkMovePaths(List actual, Path... expected) { + Assert.assertEquals(expected.length, actual.size()); + for (Path p : expected) { + Assert.assertTrue(actual.contains(p)); + } + } + + /** + * Test Mover Cli by specifying a list of files/directories using option "-p". + * There is only one namenode (and hence name service) specified in the conf. + */ + @Test + public void testMoverCli() throws Exception { + final MiniDFSCluster cluster = new MiniDFSCluster + .Builder(new HdfsConfiguration()).numDataNodes(0).build(); + try { + final Configuration conf = cluster.getConfiguration(0); + try { + Mover.Cli.getNameNodePathsToMove(conf, "-p", "/foo", "bar"); + Assert.fail("Expected exception for illegal path bar"); + } catch (IllegalArgumentException e) { + GenericTestUtils.assertExceptionContains("bar is not absolute", e); + } + + Map> movePaths = Mover.Cli.getNameNodePathsToMove(conf); + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(1, namenodes.size()); + Assert.assertEquals(1, movePaths.size()); + URI nn = namenodes.iterator().next(); + Assert.assertTrue(movePaths.containsKey(nn)); + Assert.assertNull(movePaths.get(nn)); + + movePaths = Mover.Cli.getNameNodePathsToMove(conf, "-p", "/foo", "/bar"); + namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(1, movePaths.size()); + nn = namenodes.iterator().next(); + Assert.assertTrue(movePaths.containsKey(nn)); + checkMovePaths(movePaths.get(nn), new Path("/foo"), new Path("/bar")); + } finally { + cluster.shutdown(); + } + } + + @Test + public void testMoverCliWithHAConf() throws Exception { + final Configuration conf = new HdfsConfiguration(); + final MiniDFSCluster cluster = new MiniDFSCluster + .Builder(new HdfsConfiguration()) + .nnTopology(MiniDFSNNTopology.simpleHATopology()) + .numDataNodes(0).build(); + HATestUtil.setFailoverConfigurations(cluster, conf, "MyCluster"); + try { + Map> movePaths = Mover.Cli.getNameNodePathsToMove(conf, + "-p", "/foo", "/bar"); + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(1, namenodes.size()); + Assert.assertEquals(1, movePaths.size()); + URI nn = namenodes.iterator().next(); + Assert.assertEquals(new URI("hdfs://MyCluster"), nn); + Assert.assertTrue(movePaths.containsKey(nn)); + checkMovePaths(movePaths.get(nn), new Path("/foo"), new Path("/bar")); + } finally { + cluster.shutdown(); + } + } + + @Test + public void testMoverCliWithFederation() throws Exception { + final MiniDFSCluster cluster = new MiniDFSCluster + .Builder(new HdfsConfiguration()) + .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3)) + .numDataNodes(0).build(); + final Configuration conf = new HdfsConfiguration(); + DFSTestUtil.setFederatedConfiguration(cluster, conf); + try { + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(3, namenodes.size()); + + try { + Mover.Cli.getNameNodePathsToMove(conf, "-p", "/foo"); + Assert.fail("Expect exception for missing authority information"); + } catch (IllegalArgumentException e) { + GenericTestUtils.assertExceptionContains( + "does not contain scheme and authority", e); + } + + try { + Mover.Cli.getNameNodePathsToMove(conf, "-p", "hdfs:///foo"); + Assert.fail("Expect exception for missing authority information"); + } catch (IllegalArgumentException e) { + GenericTestUtils.assertExceptionContains( + "does not contain scheme and authority", e); + } + + try { + Mover.Cli.getNameNodePathsToMove(conf, "-p", "wrong-hdfs://ns1/foo"); + Assert.fail("Expect exception for wrong scheme"); + } catch (IllegalArgumentException e) { + GenericTestUtils.assertExceptionContains("Cannot resolve the path", e); + } + + Iterator iter = namenodes.iterator(); + URI nn1 = iter.next(); + URI nn2 = iter.next(); + Map> movePaths = Mover.Cli.getNameNodePathsToMove(conf, + "-p", nn1 + "/foo", nn1 + "/bar", nn2 + "/foo/bar"); + Assert.assertEquals(2, movePaths.size()); + checkMovePaths(movePaths.get(nn1), new Path("/foo"), new Path("/bar")); + checkMovePaths(movePaths.get(nn2), new Path("/foo/bar")); + } finally { + cluster.shutdown(); + } + } + + @Test + public void testMoverCliWithFederationHA() throws Exception { + final MiniDFSCluster cluster = new MiniDFSCluster + .Builder(new HdfsConfiguration()) + .nnTopology(MiniDFSNNTopology.simpleHAFederatedTopology(3)) + .numDataNodes(0).build(); + final Configuration conf = new HdfsConfiguration(); + DFSTestUtil.setFederatedHAConfiguration(cluster, conf); + try { + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Assert.assertEquals(3, namenodes.size()); + + Iterator iter = namenodes.iterator(); + URI nn1 = iter.next(); + URI nn2 = iter.next(); + URI nn3 = iter.next(); + Map> movePaths = Mover.Cli.getNameNodePathsToMove(conf, + "-p", nn1 + "/foo", nn1 + "/bar", nn2 + "/foo/bar", nn3 + "/foobar"); + Assert.assertEquals(3, movePaths.size()); + checkMovePaths(movePaths.get(nn1), new Path("/foo"), new Path("/bar")); + checkMovePaths(movePaths.get(nn2), new Path("/foo/bar")); + checkMovePaths(movePaths.get(nn3), new Path("/foobar")); + } finally { + cluster.shutdown(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java new file mode 100644 index 0000000000000..e40f142e5aefe --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java @@ -0,0 +1,736 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.mover; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import com.google.common.base.Joiner; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.ReconfigurationException; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.BlockStoragePolicy; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSOutputStream; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.protocol.DirectoryListing; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; +import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; +import org.apache.hadoop.hdfs.server.balancer.Dispatcher; +import org.apache.hadoop.hdfs.server.balancer.ExitStatus; +import org.apache.hadoop.hdfs.server.balancer.TestBalancer; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; +import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.io.IOUtils; +import org.apache.log4j.Level; +import org.junit.Assert; +import org.junit.Test; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; + +/** + * Test the data migration tool (for Archival Storage) + */ +public class TestStorageMover { + static final Log LOG = LogFactory.getLog(TestStorageMover.class); + static { + ((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class) + ).getLogger().setLevel(Level.ALL); + ((Log4JLogger)LogFactory.getLog(Dispatcher.class) + ).getLogger().setLevel(Level.ALL); + ((Log4JLogger)LogFactory.getLog(DataTransferProtocol.class)).getLogger() + .setLevel(Level.ALL); + } + + private static final int BLOCK_SIZE = 1024; + private static final short REPL = 3; + private static final int NUM_DATANODES = 6; + private static final Configuration DEFAULT_CONF = new HdfsConfiguration(); + private static final BlockStoragePolicy.Suite DEFAULT_POLICIES; + private static final BlockStoragePolicy HOT; + private static final BlockStoragePolicy WARM; + private static final BlockStoragePolicy COLD; + + static { + DEFAULT_CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + DEFAULT_CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1L); + DEFAULT_CONF.setLong(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, + 2L); + DEFAULT_CONF.setLong(DFSConfigKeys.DFS_MOVER_MOVEDWINWIDTH_KEY, 2000L); + + DEFAULT_POLICIES = BlockStoragePolicy.readBlockStorageSuite(DEFAULT_CONF); + HOT = DEFAULT_POLICIES.getPolicy("HOT"); + WARM = DEFAULT_POLICIES.getPolicy("WARM"); + COLD = DEFAULT_POLICIES.getPolicy("COLD"); + TestBalancer.initTestSetup(); + Dispatcher.setDelayAfterErrors(1000L); + } + + /** + * This scheme defines files/directories and their block storage policies. It + * also defines snapshots. + */ + static class NamespaceScheme { + final List dirs; + final List files; + final long fileSize; + final Map> snapshotMap; + final Map policyMap; + + NamespaceScheme(List dirs, List files, long fileSize, + Map> snapshotMap, + Map policyMap) { + this.dirs = dirs == null? Collections.emptyList(): dirs; + this.files = files == null? Collections.emptyList(): files; + this.fileSize = fileSize; + this.snapshotMap = snapshotMap == null ? + Collections.>emptyMap() : snapshotMap; + this.policyMap = policyMap; + } + + /** + * Create files/directories/snapshots. + */ + void prepare(DistributedFileSystem dfs, short repl) throws Exception { + for (Path d : dirs) { + dfs.mkdirs(d); + } + for (Path file : files) { + DFSTestUtil.createFile(dfs, file, fileSize, repl, 0L); + } + for (Map.Entry> entry : snapshotMap.entrySet()) { + for (String snapshot : entry.getValue()) { + SnapshotTestHelper.createSnapshot(dfs, entry.getKey(), snapshot); + } + } + } + + /** + * Set storage policies according to the corresponding scheme. + */ + void setStoragePolicy(DistributedFileSystem dfs) throws Exception { + for (Map.Entry entry : policyMap.entrySet()) { + dfs.setStoragePolicy(entry.getKey(), entry.getValue().getName()); + } + } + } + + /** + * This scheme defines DataNodes and their storage, including storage types + * and remaining capacities. + */ + static class ClusterScheme { + final Configuration conf; + final int numDataNodes; + final short repl; + final StorageType[][] storageTypes; + final long[][] storageCapacities; + + ClusterScheme() { + this(DEFAULT_CONF, NUM_DATANODES, REPL, + genStorageTypes(NUM_DATANODES), null); + } + + ClusterScheme(Configuration conf, int numDataNodes, short repl, + StorageType[][] types, long[][] capacities) { + Preconditions.checkArgument(types == null || types.length == numDataNodes); + Preconditions.checkArgument(capacities == null || capacities.length == + numDataNodes); + this.conf = conf; + this.numDataNodes = numDataNodes; + this.repl = repl; + this.storageTypes = types; + this.storageCapacities = capacities; + } + } + + class MigrationTest { + private final ClusterScheme clusterScheme; + private final NamespaceScheme nsScheme; + private final Configuration conf; + + private MiniDFSCluster cluster; + private DistributedFileSystem dfs; + private final BlockStoragePolicy.Suite policies; + + MigrationTest(ClusterScheme cScheme, NamespaceScheme nsScheme) { + this.clusterScheme = cScheme; + this.nsScheme = nsScheme; + this.conf = clusterScheme.conf; + this.policies = BlockStoragePolicy.readBlockStorageSuite(conf); + } + + /** + * Set up the cluster and start NameNode and DataNodes according to the + * corresponding scheme. + */ + void setupCluster() throws Exception { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(clusterScheme + .numDataNodes).storageTypes(clusterScheme.storageTypes) + .storageCapacities(clusterScheme.storageCapacities).build(); + cluster.waitActive(); + dfs = cluster.getFileSystem(); + } + + private void runBasicTest(boolean shutdown) throws Exception { + setupCluster(); + try { + prepareNamespace(); + verify(true); + + setStoragePolicy(); + migrate(); + verify(true); + } finally { + if (shutdown) { + shutdownCluster(); + } + } + } + + void shutdownCluster() throws Exception { + IOUtils.cleanup(null, dfs); + if (cluster != null) { + cluster.shutdown(); + } + } + + /** + * Create files/directories and set their storage policies according to the + * corresponding scheme. + */ + void prepareNamespace() throws Exception { + nsScheme.prepare(dfs, clusterScheme.repl); + } + + void setStoragePolicy() throws Exception { + nsScheme.setStoragePolicy(dfs); + } + + /** + * Run the migration tool. + */ + void migrate() throws Exception { + runMover(); + Thread.sleep(5000); // let the NN finish deletion + } + + /** + * Verify block locations after running the migration tool. + */ + void verify(boolean verifyAll) throws Exception { + for (DataNode dn : cluster.getDataNodes()) { + DataNodeTestUtils.triggerBlockReport(dn); + } + if (verifyAll) { + verifyNamespace(); + } else { + // TODO verify according to the given path list + + } + } + + private void runMover() throws Exception { + Collection namenodes = DFSUtil.getNsServiceRpcUris(conf); + Map> nnMap = Maps.newHashMap(); + for (URI nn : namenodes) { + nnMap.put(nn, null); + } + int result = Mover.run(nnMap, conf); + Assert.assertEquals(ExitStatus.SUCCESS.getExitCode(), result); + } + + private void verifyNamespace() throws Exception { + HdfsFileStatus status = dfs.getClient().getFileInfo("/"); + verifyRecursively(null, status); + } + + private void verifyRecursively(final Path parent, + final HdfsFileStatus status) throws Exception { + if (status.isDir()) { + Path fullPath = parent == null ? + new Path("/") : status.getFullPath(parent); + DirectoryListing children = dfs.getClient().listPaths( + fullPath.toString(), HdfsFileStatus.EMPTY_NAME, true); + for (HdfsFileStatus child : children.getPartialListing()) { + verifyRecursively(fullPath, child); + } + } else if (!status.isSymlink()) { // is file + verifyFile(parent, status, null); + } + } + + void verifyFile(final Path file, final Byte expectedPolicyId) + throws Exception { + final Path parent = file.getParent(); + DirectoryListing children = dfs.getClient().listPaths( + parent.toString(), HdfsFileStatus.EMPTY_NAME, true); + for (HdfsFileStatus child : children.getPartialListing()) { + if (child.getLocalName().equals(file.getName())) { + verifyFile(parent, child, expectedPolicyId); + return; + } + } + Assert.fail("File " + file + " not found."); + } + + private void verifyFile(final Path parent, final HdfsFileStatus status, + final Byte expectedPolicyId) throws Exception { + HdfsLocatedFileStatus fileStatus = (HdfsLocatedFileStatus) status; + byte policyId = fileStatus.getStoragePolicy(); + BlockStoragePolicy policy = policies.getPolicy(policyId); + if (expectedPolicyId != null) { + Assert.assertEquals((byte)expectedPolicyId, policy.getId()); + } + final List types = policy.chooseStorageTypes( + status.getReplication()); + for(LocatedBlock lb : fileStatus.getBlockLocations().getLocatedBlocks()) { + final Mover.StorageTypeDiff diff = new Mover.StorageTypeDiff(types, + lb.getStorageTypes()); + Assert.assertTrue(fileStatus.getFullName(parent.toString()) + + " with policy " + policy + " has non-empty overlap: " + diff + + ", the corresponding block is " + lb.getBlock().getLocalBlock(), + diff.removeOverlap()); + } + } + + Replication getReplication(Path file) throws IOException { + return getOrVerifyReplication(file, null); + } + + Replication verifyReplication(Path file, int expectedDiskCount, + int expectedArchiveCount) throws IOException { + final Replication r = new Replication(); + r.disk = expectedDiskCount; + r.archive = expectedArchiveCount; + return getOrVerifyReplication(file, r); + } + + private Replication getOrVerifyReplication(Path file, Replication expected) + throws IOException { + final List lbs = dfs.getClient().getLocatedBlocks( + file.toString(), 0).getLocatedBlocks(); + Assert.assertEquals(1, lbs.size()); + + LocatedBlock lb = lbs.get(0); + StringBuilder types = new StringBuilder(); + final Replication r = new Replication(); + for(StorageType t : lb.getStorageTypes()) { + types.append(t).append(", "); + if (t == StorageType.DISK) { + r.disk++; + } else if (t == StorageType.ARCHIVE) { + r.archive++; + } else { + Assert.fail("Unexpected storage type " + t); + } + } + + if (expected != null) { + final String s = "file = " + file + "\n types = [" + types + "]"; + Assert.assertEquals(s, expected, r); + } + return r; + } + } + + static class Replication { + int disk; + int archive; + + @Override + public int hashCode() { + return disk ^ archive; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } else if (obj == null || !(obj instanceof Replication)) { + return false; + } + final Replication that = (Replication)obj; + return this.disk == that.disk && this.archive == that.archive; + } + + @Override + public String toString() { + return "[disk=" + disk + ", archive=" + archive + "]"; + } + } + + private static StorageType[][] genStorageTypes(int numDataNodes) { + return genStorageTypes(numDataNodes, 0, 0); + } + + private static StorageType[][] genStorageTypes(int numDataNodes, + int numAllDisk, int numAllArchive) { + StorageType[][] types = new StorageType[numDataNodes][]; + int i = 0; + for (; i < numAllDisk; i++) { + types[i] = new StorageType[]{StorageType.DISK, StorageType.DISK}; + } + for (; i < numAllDisk + numAllArchive; i++) { + types[i] = new StorageType[]{StorageType.ARCHIVE, StorageType.ARCHIVE}; + } + for (; i < types.length; i++) { + types[i] = new StorageType[]{StorageType.DISK, StorageType.ARCHIVE}; + } + return types; + } + + private static long[][] genCapacities(int nDatanodes, int numAllDisk, + int numAllArchive, long diskCapacity, long archiveCapacity) { + final long[][] capacities = new long[nDatanodes][]; + int i = 0; + for (; i < numAllDisk; i++) { + capacities[i] = new long[]{diskCapacity, diskCapacity}; + } + for (; i < numAllDisk + numAllArchive; i++) { + capacities[i] = new long[]{archiveCapacity, archiveCapacity}; + } + for(; i < capacities.length; i++) { + capacities[i] = new long[]{diskCapacity, archiveCapacity}; + } + return capacities; + } + + private static class PathPolicyMap { + final Map map = Maps.newHashMap(); + final Path hot = new Path("/hot"); + final Path warm = new Path("/warm"); + final Path cold = new Path("/cold"); + final List files; + + PathPolicyMap(int filesPerDir){ + map.put(hot, HOT); + map.put(warm, WARM); + map.put(cold, COLD); + files = new ArrayList(); + for(Path dir : map.keySet()) { + for(int i = 0; i < filesPerDir; i++) { + files.add(new Path(dir, "file" + i)); + } + } + } + + NamespaceScheme newNamespaceScheme() { + return new NamespaceScheme(Arrays.asList(hot, warm, cold), + files, BLOCK_SIZE/2, null, map); + } + + /** + * Move hot files to warm and cold, warm files to hot and cold, + * and cold files to hot and warm. + */ + void moveAround(DistributedFileSystem dfs) throws Exception { + for(Path srcDir : map.keySet()) { + int i = 0; + for(Path dstDir : map.keySet()) { + if (!srcDir.equals(dstDir)) { + final Path src = new Path(srcDir, "file" + i++); + final Path dst = new Path(dstDir, srcDir.getName() + "2" + dstDir.getName()); + LOG.info("rename " + src + " to " + dst); + dfs.rename(src, dst); + } + } + } + } + } + + /** + * A normal case for Mover: move a file into archival storage + */ + @Test + public void testMigrateFileToArchival() throws Exception { + LOG.info("testMigrateFileToArchival"); + final Path foo = new Path("/foo"); + Map policyMap = Maps.newHashMap(); + policyMap.put(foo, COLD); + NamespaceScheme nsScheme = new NamespaceScheme(null, Arrays.asList(foo), + 2*BLOCK_SIZE, null, policyMap); + ClusterScheme clusterScheme = new ClusterScheme(DEFAULT_CONF, + NUM_DATANODES, REPL, genStorageTypes(NUM_DATANODES), null); + new MigrationTest(clusterScheme, nsScheme).runBasicTest(true); + } + + /** + * Print a big banner in the test log to make debug easier. + */ + static void banner(String string) { + LOG.info("\n\n\n\n================================================\n" + + string + "\n" + + "==================================================\n\n"); + } + + /** + * Move an open file into archival storage + */ + @Test + public void testMigrateOpenFileToArchival() throws Exception { + LOG.info("testMigrateOpenFileToArchival"); + final Path fooDir = new Path("/foo"); + Map policyMap = Maps.newHashMap(); + policyMap.put(fooDir, COLD); + NamespaceScheme nsScheme = new NamespaceScheme(Arrays.asList(fooDir), null, + BLOCK_SIZE, null, policyMap); + ClusterScheme clusterScheme = new ClusterScheme(DEFAULT_CONF, + NUM_DATANODES, REPL, genStorageTypes(NUM_DATANODES), null); + MigrationTest test = new MigrationTest(clusterScheme, nsScheme); + test.setupCluster(); + + // create an open file + banner("writing to file /foo/bar"); + final Path barFile = new Path(fooDir, "bar"); + DFSTestUtil.createFile(test.dfs, barFile, BLOCK_SIZE, (short) 1, 0L); + FSDataOutputStream out = test.dfs.append(barFile); + out.writeBytes("hello, "); + ((DFSOutputStream) out.getWrappedStream()).hsync(); + + try { + banner("start data migration"); + test.setStoragePolicy(); // set /foo to COLD + test.migrate(); + + // make sure the under construction block has not been migrated + LocatedBlocks lbs = test.dfs.getClient().getLocatedBlocks( + barFile.toString(), BLOCK_SIZE); + LOG.info("Locations: " + lbs); + List blks = lbs.getLocatedBlocks(); + Assert.assertEquals(1, blks.size()); + Assert.assertEquals(1, blks.get(0).getLocations().length); + + banner("finish the migration, continue writing"); + // make sure the writing can continue + out.writeBytes("world!"); + ((DFSOutputStream) out.getWrappedStream()).hsync(); + IOUtils.cleanup(LOG, out); + + lbs = test.dfs.getClient().getLocatedBlocks( + barFile.toString(), BLOCK_SIZE); + LOG.info("Locations: " + lbs); + blks = lbs.getLocatedBlocks(); + Assert.assertEquals(1, blks.size()); + Assert.assertEquals(1, blks.get(0).getLocations().length); + + banner("finish writing, starting reading"); + // check the content of /foo/bar + FSDataInputStream in = test.dfs.open(barFile); + byte[] buf = new byte[13]; + // read from offset 1024 + in.readFully(BLOCK_SIZE, buf, 0, buf.length); + IOUtils.cleanup(LOG, in); + Assert.assertEquals("hello, world!", new String(buf)); + } finally { + test.shutdownCluster(); + } + } + + /** + * Test directories with Hot, Warm and Cold polices. + */ + @Test + public void testHotWarmColdDirs() throws Exception { + LOG.info("testHotWarmColdDirs"); + PathPolicyMap pathPolicyMap = new PathPolicyMap(3); + NamespaceScheme nsScheme = pathPolicyMap.newNamespaceScheme(); + ClusterScheme clusterScheme = new ClusterScheme(); + MigrationTest test = new MigrationTest(clusterScheme, nsScheme); + + try { + test.runBasicTest(false); + pathPolicyMap.moveAround(test.dfs); + test.migrate(); + + test.verify(true); + } finally { + test.shutdownCluster(); + } + } + + private void waitForAllReplicas(int expectedReplicaNum, Path file, + DistributedFileSystem dfs) throws Exception { + for (int i = 0; i < 5; i++) { + LocatedBlocks lbs = dfs.getClient().getLocatedBlocks(file.toString(), 0, + BLOCK_SIZE); + LocatedBlock lb = lbs.get(0); + if (lb.getLocations().length >= expectedReplicaNum) { + return; + } else { + Thread.sleep(1000); + } + } + } + + private void setVolumeFull(DataNode dn, StorageType type) { + List volumes = dn.getFSDataset().getVolumes(); + for (int j = 0; j < volumes.size(); ++j) { + FsVolumeImpl volume = (FsVolumeImpl) volumes.get(j); + if (volume.getStorageType() == type) { + LOG.info("setCapacity to 0 for [" + volume.getStorageType() + "]" + + volume.getStorageID()); + volume.setCapacityForTesting(0); + } + } + } + + /** + * Test DISK is running out of spaces. + */ + @Test + public void testNoSpaceDisk() throws Exception { + LOG.info("testNoSpaceDisk"); + final PathPolicyMap pathPolicyMap = new PathPolicyMap(0); + final NamespaceScheme nsScheme = pathPolicyMap.newNamespaceScheme(); + + Configuration conf = new Configuration(DEFAULT_CONF); + final ClusterScheme clusterScheme = new ClusterScheme(conf, + NUM_DATANODES, REPL, genStorageTypes(NUM_DATANODES), null); + final MigrationTest test = new MigrationTest(clusterScheme, nsScheme); + + try { + test.runBasicTest(false); + + // create 2 hot files with replication 3 + final short replication = 3; + for (int i = 0; i < 2; i++) { + final Path p = new Path(pathPolicyMap.hot, "file" + i); + DFSTestUtil.createFile(test.dfs, p, BLOCK_SIZE, replication, 0L); + waitForAllReplicas(replication, p, test.dfs); + } + + // set all the DISK volume to full + for (DataNode dn : test.cluster.getDataNodes()) { + setVolumeFull(dn, StorageType.DISK); + DataNodeTestUtils.triggerHeartbeat(dn); + } + + // test increasing replication. Since DISK is full, + // new replicas should be stored in ARCHIVE as a fallback storage. + final Path file0 = new Path(pathPolicyMap.hot, "file0"); + final Replication r = test.getReplication(file0); + final short newReplication = (short) 5; + test.dfs.setReplication(file0, newReplication); + Thread.sleep(10000); + test.verifyReplication(file0, r.disk, newReplication - r.disk); + + // test creating a cold file and then increase replication + final Path p = new Path(pathPolicyMap.cold, "foo"); + DFSTestUtil.createFile(test.dfs, p, BLOCK_SIZE, replication, 0L); + test.verifyReplication(p, 0, replication); + + test.dfs.setReplication(p, newReplication); + Thread.sleep(10000); + test.verifyReplication(p, 0, newReplication); + + //test move a hot file to warm + final Path file1 = new Path(pathPolicyMap.hot, "file1"); + test.dfs.rename(file1, pathPolicyMap.warm); + test.migrate(); + test.verifyFile(new Path(pathPolicyMap.warm, "file1"), WARM.getId()); + } finally { + test.shutdownCluster(); + } + } + + /** + * Test ARCHIVE is running out of spaces. + */ + @Test + public void testNoSpaceArchive() throws Exception { + LOG.info("testNoSpaceArchive"); + final PathPolicyMap pathPolicyMap = new PathPolicyMap(0); + final NamespaceScheme nsScheme = pathPolicyMap.newNamespaceScheme(); + + final ClusterScheme clusterScheme = new ClusterScheme(DEFAULT_CONF, + NUM_DATANODES, REPL, genStorageTypes(NUM_DATANODES), null); + final MigrationTest test = new MigrationTest(clusterScheme, nsScheme); + + try { + test.runBasicTest(false); + + // create 2 hot files with replication 3 + final short replication = 3; + for (int i = 0; i < 2; i++) { + final Path p = new Path(pathPolicyMap.cold, "file" + i); + DFSTestUtil.createFile(test.dfs, p, BLOCK_SIZE, replication, 0L); + waitForAllReplicas(replication, p, test.dfs); + } + + // set all the ARCHIVE volume to full + for (DataNode dn : test.cluster.getDataNodes()) { + setVolumeFull(dn, StorageType.ARCHIVE); + DataNodeTestUtils.triggerHeartbeat(dn); + } + + { // test increasing replication but new replicas cannot be created + // since no more ARCHIVE space. + final Path file0 = new Path(pathPolicyMap.cold, "file0"); + final Replication r = test.getReplication(file0); + Assert.assertEquals(0, r.disk); + + final short newReplication = (short) 5; + test.dfs.setReplication(file0, newReplication); + Thread.sleep(10000); + + test.verifyReplication(file0, 0, r.archive); + } + + { // test creating a hot file + final Path p = new Path(pathPolicyMap.hot, "foo"); + DFSTestUtil.createFile(test.dfs, p, BLOCK_SIZE, (short) 3, 0L); + } + + { //test move a cold file to warm + final Path file1 = new Path(pathPolicyMap.cold, "file1"); + test.dfs.rename(file1, pathPolicyMap.warm); + test.migrate(); + test.verify(true); + } + } finally { + test.shutdownCluster(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java index 3f96c0c5ce001..94b139b261b69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java @@ -82,7 +82,7 @@ static void addFiles(FSEditLog editLog, int numFiles, short replication, } final INodeFile inode = new INodeFile(inodeId.nextValue(), null, - p, 0L, 0L, blocks, replication, blockSize); + p, 0L, 0L, blocks, replication, blockSize, (byte)0); inode.toUnderConstruction("", ""); // Append path to filename with information about blockIDs @@ -97,7 +97,7 @@ static void addFiles(FSEditLog editLog, int numFiles, short replication, editLog.logMkDir(currentDir, dirInode); } INodeFile fileUc = new INodeFile(inodeId.nextValue(), null, - p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize); + p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize, (byte)0); fileUc.toUnderConstruction("", ""); editLog.logOpenFile(filePath, fileUc, false, false); editLog.logCloseFile(filePath, inode); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java index 1ddc774c84217..adca0aaf61565 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSAclBaseTest.java @@ -39,7 +39,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.AclException; -import org.apache.hadoop.hdfs.protocol.FsAclPermission; +import org.apache.hadoop.hdfs.protocol.FsPermissionExtension; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; @@ -822,7 +822,8 @@ public void testSetPermissionCannotSetAclBit() throws IOException { fs.setPermission(path, FsPermission.createImmutable((short)0700)); assertPermission((short)0700); fs.setPermission(path, - new FsAclPermission(FsPermission.createImmutable((short)0755))); + new FsPermissionExtension(FsPermission. + createImmutable((short)0755), true, true)); INode inode = cluster.getNamesystem().getFSDirectory().getNode( path.toUri().getPath(), false); assertNotNull(inode); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java index 9c48400624757..e21e34ca17363 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSXAttrBaseTest.java @@ -45,7 +45,9 @@ import static org.apache.hadoop.fs.permission.FsAction.ALL; import static org.apache.hadoop.fs.permission.FsAction.READ; import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry; +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import org.junit.After; @@ -64,14 +66,14 @@ */ public class FSXAttrBaseTest { - private static final int MAX_SIZE = 16; - protected static MiniDFSCluster dfsCluster; protected static Configuration conf; private static int pathCount = 0; protected static Path path; + protected static Path filePath; protected static Path rawPath; - + protected static Path rawFilePath; + // XAttrs protected static final String name1 = "user.a1"; protected static final byte[] value1 = {0x31, 0x32, 0x33}; @@ -82,6 +84,10 @@ public class FSXAttrBaseTest { protected static final String name4 = "user.a4"; protected static final String raw1 = "raw.a1"; protected static final String raw2 = "raw.a2"; + protected static final String security1 = + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; + + private static final int MAX_SIZE = security1.length(); protected FileSystem fs; @@ -111,7 +117,9 @@ public static void shutdown() { public void setUp() throws Exception { pathCount += 1; path = new Path("/p" + pathCount); + filePath = new Path(path, "file"); rawPath = new Path("/.reserved/raw/p" + pathCount); + rawFilePath = new Path(rawPath, "file"); initFileSystem(); } @@ -133,16 +141,17 @@ public void testCreateXAttr() throws Exception { Map expectedXAttrs = Maps.newHashMap(); expectedXAttrs.put(name1, value1); expectedXAttrs.put(name2, null); - doTestCreateXAttr(path, expectedXAttrs); + expectedXAttrs.put(security1, null); + doTestCreateXAttr(filePath, expectedXAttrs); expectedXAttrs.put(raw1, value1); - doTestCreateXAttr(rawPath, expectedXAttrs); + doTestCreateXAttr(rawFilePath, expectedXAttrs); } private void doTestCreateXAttr(Path usePath, Map expectedXAttrs) throws Exception { - FileSystem.mkdirs(fs, usePath, FsPermission.createImmutable((short)0750)); + DFSTestUtil.createFile(fs, usePath, 8192, (short) 1, 0xFEED); fs.setXAttr(usePath, name1, value1, EnumSet.of(XAttrSetFlag.CREATE)); - + Map xattrs = fs.getXAttrs(usePath); Assert.assertEquals(xattrs.size(), 1); Assert.assertArrayEquals(value1, xattrs.get(name1)); @@ -194,9 +203,7 @@ private void doTestCreateXAttr(Path usePath, Map ent : expectedXAttrs.entrySet()) { - fs.removeXAttr(usePath, ent.getKey()); - } + fs.delete(usePath, false); } /** @@ -344,13 +351,13 @@ public void testSetXAttr() throws Exception { fs.removeXAttr(path, name3); // Name length exceeds max limit - String longName = "user.0123456789abcdefX"; + String longName = "user.0123456789abcdefX0123456789abcdefX0123456789abcdef"; try { fs.setXAttr(path, longName, null); Assert.fail("Setting xattr should fail if name is too long."); } catch (IOException e) { GenericTestUtils.assertExceptionContains("XAttr is too big", e); - GenericTestUtils.assertExceptionContains("total size is 17", e); + GenericTestUtils.assertExceptionContains("total size is 50", e); } // Value length exceeds max limit @@ -360,7 +367,7 @@ public void testSetXAttr() throws Exception { Assert.fail("Setting xattr should fail if value is too long."); } catch (IOException e) { GenericTestUtils.assertExceptionContains("XAttr is too big", e); - GenericTestUtils.assertExceptionContains("total size is 17", e); + GenericTestUtils.assertExceptionContains("total size is 38", e); } // Name + value exactly equal the limit @@ -1116,6 +1123,121 @@ public Object run() throws Exception { } } + /** + * This tests the "unreadable by superuser" xattr which denies access to a + * file for the superuser. See HDFS-6705 for details. + */ + @Test(timeout = 120000) + public void testUnreadableBySuperuserXAttr() throws Exception { + // Run tests as superuser... + doTestUnreadableBySuperuserXAttr(fs, true); + + // ...and again as non-superuser + final UserGroupInformation user = UserGroupInformation. + createUserForTesting("user", new String[] { "mygroup" }); + user.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + final FileSystem userFs = dfsCluster.getFileSystem(); + doTestUnreadableBySuperuserXAttr(userFs, false); + return null; + } + }); + } + + private void doTestUnreadableBySuperuserXAttr(FileSystem userFs, + boolean expectOpenFailure) throws Exception { + + FileSystem.mkdirs(fs, path, FsPermission.createImmutable((short) 0777)); + DFSTestUtil.createFile(userFs, filePath, 8192, (short) 1, 0xFEED); + try { + doTUBSXAInt(userFs, expectOpenFailure); + // Deleting the file is allowed. + userFs.delete(filePath, false); + } finally { + fs.delete(path, true); + } + } + + private void doTUBSXAInt(FileSystem userFs, boolean expectOpenFailure) + throws Exception { + + // Test that xattr can't be set on a dir + try { + userFs.setXAttr(path, security1, null, EnumSet.of(XAttrSetFlag.CREATE)); + } catch (IOException e) { + // WebHDFS throws IOException instead of RemoteException + GenericTestUtils.assertExceptionContains("Can only set '" + + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + "' on a file", e); + } + + // Test that xattr can actually be set. Repeatedly. + userFs.setXAttr(filePath, security1, null, + EnumSet.of(XAttrSetFlag.CREATE)); + verifySecurityXAttrExists(userFs); + userFs.setXAttr(filePath, security1, null, EnumSet.of(XAttrSetFlag.CREATE, + XAttrSetFlag.REPLACE)); + verifySecurityXAttrExists(userFs); + + // Test that the xattr can't be deleted by anyone. + try { + userFs.removeXAttr(filePath, security1); + Assert.fail("Removing security xattr should fail."); + } catch (AccessControlException e) { + GenericTestUtils.assertExceptionContains("The xattr '" + + SECURITY_XATTR_UNREADABLE_BY_SUPERUSER + "' can not be deleted.", e); + } + + // Test that xattr can be read. + verifySecurityXAttrExists(userFs); + + // Test that a value can't be set for the xattr. + try { + userFs.setXAttr(filePath, security1, + value1,EnumSet.of(XAttrSetFlag.REPLACE)); + fail("Should have thrown on attempt to set value"); + } catch (AccessControlException e) { + GenericTestUtils.assertExceptionContains("Values are not allowed", e); + } + + // Test that unreadable by superuser xattr appears in listXAttrs results + // (for superuser and non-superuser) + final List xattrNames = userFs.listXAttrs(filePath); + assertTrue(xattrNames.contains(security1)); + assertTrue(xattrNames.size() == 1); + + verifyFileAccess(userFs, expectOpenFailure); + + // Rename of the file is allowed by anyone. + Path toPath = new Path(filePath.toString() + "x"); + userFs.rename(filePath, toPath); + userFs.rename(toPath, filePath); + } + + private void verifySecurityXAttrExists(FileSystem userFs) throws Exception { + try { + final Map xattrs = userFs.getXAttrs(filePath); + Assert.assertEquals(1, xattrs.size()); + Assert.assertNotNull(xattrs.get(security1)); + Assert.assertArrayEquals("expected empty byte[] from getXAttr", + new byte[0], userFs.getXAttr(filePath, security1)); + + } catch (AccessControlException e) { + fail("getXAttrs failed but expected it to succeed"); + } + } + + private void verifyFileAccess(FileSystem userFs, boolean expectOpenFailure) + throws Exception { + // Test that a file with the xattr can or can't be opened. + try { + userFs.open(filePath); + assertFalse("open succeeded but expected it to fail", expectOpenFailure); + } catch (AccessControlException e) { + assertTrue("open failed but expected it to succeed", expectOpenFailure); + } + } + /** * Creates a FileSystem for the super-user. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java index 5153e76f96587..cf37a54252d6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAddBlockRetry.java @@ -120,9 +120,9 @@ public DatanodeStorageInfo[] answer(InvocationOnMock invocation) } return ret; } - }).when(spyBM).chooseTarget(Mockito.anyString(), Mockito.anyInt(), + }).when(spyBM).chooseTarget4NewBlock(Mockito.anyString(), Mockito.anyInt(), Mockito.any(), Mockito.>any(), - Mockito.anyLong(), Mockito.>any()); + Mockito.anyLong(), Mockito.>any(), Mockito.anyByte()); // create file nn.create(src, FsPermission.getFileDefault(), diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java index 4cdd8092d188b..b0f6b6ae0107a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeleteRace.java @@ -29,13 +29,13 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.BlockStoragePolicy; import org.apache.hadoop.hdfs.AppendTestUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; @@ -121,10 +121,10 @@ public DatanodeStorageInfo[] chooseTarget(String srcPath, boolean returnChosenNodes, Set excludedNodes, long blocksize, - StorageType storageType) { + final BlockStoragePolicy storagePolicy) { DatanodeStorageInfo[] results = super.chooseTarget(srcPath, numOfReplicas, writer, chosenNodes, returnChosenNodes, excludedNodes, - blocksize, storageType); + blocksize, storagePolicy); try { Thread.sleep(3000); } catch (InterruptedException e) {} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index 7b622426aadf9..8070a5f49d4c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -194,7 +194,7 @@ public void run() { for (int i = 0; i < numTransactions; i++) { INodeFile inode = new INodeFile(namesystem.allocateNewInodeId(), null, - p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize); + p, 0L, 0L, BlockInfo.EMPTY_ARRAY, replication, blockSize, (byte)0); inode.toUnderConstruction("", ""); editLog.logOpenFile("/filename" + (startIndex + i), inode, false, false); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java index b1c5ca7c55725..9bee4a9bffb98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSPermissionChecker.java @@ -17,32 +17,41 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.hadoop.fs.permission.AclEntryScope.*; -import static org.apache.hadoop.fs.permission.AclEntryType.*; -import static org.apache.hadoop.fs.permission.FsAction.*; -import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.*; -import static org.junit.Assert.*; +import static org.apache.hadoop.fs.permission.AclEntryScope.ACCESS; +import static org.apache.hadoop.fs.permission.AclEntryScope.DEFAULT; +import static org.apache.hadoop.fs.permission.AclEntryType.GROUP; +import static org.apache.hadoop.fs.permission.AclEntryType.MASK; +import static org.apache.hadoop.fs.permission.AclEntryType.OTHER; +import static org.apache.hadoop.fs.permission.AclEntryType.USER; +import static org.apache.hadoop.fs.permission.FsAction.ALL; +import static org.apache.hadoop.fs.permission.FsAction.EXECUTE; +import static org.apache.hadoop.fs.permission.FsAction.NONE; +import static org.apache.hadoop.fs.permission.FsAction.READ; +import static org.apache.hadoop.fs.permission.FsAction.READ_EXECUTE; +import static org.apache.hadoop.fs.permission.FsAction.READ_WRITE; +import static org.apache.hadoop.fs.permission.FsAction.WRITE; +import static org.apache.hadoop.fs.permission.FsAction.WRITE_EXECUTE; +import static org.apache.hadoop.hdfs.server.namenode.AclTestHelpers.aclEntry; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.conf.Configuration; -import org.junit.Before; -import org.junit.Test; - import org.apache.hadoop.fs.permission.AclEntry; -import org.apache.hadoop.fs.permission.AclEntryScope; -import org.apache.hadoop.fs.permission.AclEntryType; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; +import org.junit.Before; +import org.junit.Test; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; - -import static org.mockito.Mockito.*; /** * Unit tests covering FSPermissionChecker. All tests in this suite have been * cross-validated against Linux setfacl/getfacl to check for consistency of the @@ -423,7 +432,7 @@ private static INodeFile createINodeFile(INodeDirectory parent, String name, FsPermission.createImmutable(perm)); INodeFile inodeFile = new INodeFile(INodeId.GRANDFATHER_INODE_ID, name.getBytes("UTF-8"), permStatus, 0L, 0L, null, REPLICATION, - PREFERRED_BLOCK_SIZE); + PREFERRED_BLOCK_SIZE, (byte)0); parent.addChild(inodeFile); return inodeFile; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index 4cddd60f3f2ba..8d298aeeadf2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -1015,10 +1015,11 @@ public void testFsckFileNotFound() throws Exception { path = DFSUtil.string2Bytes(pathString); long fileId = 312321L; int numChildren = 1; + byte storagePolicy = 0; HdfsFileStatus file = new HdfsFileStatus(length, isDir, blockReplication, blockSize, modTime, accessTime, perms, owner, group, symlink, path, - fileId, numChildren, null); + fileId, numChildren, null, storagePolicy); Result res = new Result(conf); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index a739b7aa6ed56..26d9a96c34949 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -75,15 +75,39 @@ public class TestINodeFile { static final short BLOCKBITS = 48; static final long BLKSIZE_MAXVALUE = ~(0xffffL << BLOCKBITS); - private final PermissionStatus perm = new PermissionStatus( + private static final PermissionStatus perm = new PermissionStatus( "userName", null, FsPermission.getDefault()); private short replication; private long preferredBlockSize = 1024; INodeFile createINodeFile(short replication, long preferredBlockSize) { return new INodeFile(INodeId.GRANDFATHER_INODE_ID, null, perm, 0L, 0L, - null, replication, preferredBlockSize); + null, replication, preferredBlockSize, (byte)0); } + + private static INodeFile createINodeFile(byte storagePolicyID) { + return new INodeFile(INodeId.GRANDFATHER_INODE_ID, null, perm, 0L, 0L, + null, (short)3, 1024L, storagePolicyID); + } + + @Test + public void testStoragePolicyID () { + for(byte i = 0; i < 16; i++) { + final INodeFile f = createINodeFile(i); + assertEquals(i, f.getStoragePolicyID()); + } + } + + @Test(expected=IllegalArgumentException.class) + public void testStoragePolicyIdBelowLowerBound () throws IllegalArgumentException { + createINodeFile((byte)-1); + } + + @Test(expected=IllegalArgumentException.class) + public void testStoragePolicyIdAboveUpperBound () throws IllegalArgumentException { + createINodeFile((byte)16); + } + /** * Test for the Replication value. Sets a value and checks if it was set * correct. @@ -259,7 +283,7 @@ private INodeFile[] createINodeFiles(int nCount, String fileNamePrefix) { INodeFile[] iNodes = new INodeFile[nCount]; for (int i = 0; i < nCount; i++) { iNodes[i] = new INodeFile(i, null, perm, 0L, 0L, null, replication, - preferredBlockSize); + preferredBlockSize, (byte)0); iNodes[i].setLocalName(DFSUtil.string2Bytes(fileNamePrefix + i)); BlockInfo newblock = new BlockInfo(replication); iNodes[i].addBlock(newblock); @@ -316,7 +340,8 @@ public void testValueOf () throws IOException { {//cast from INodeFileUnderConstruction final INode from = new INodeFile( - INodeId.GRANDFATHER_INODE_ID, null, perm, 0L, 0L, null, replication, 1024L); + INodeId.GRANDFATHER_INODE_ID, null, perm, 0L, 0L, null, replication, + 1024L, (byte)0); from.asFile().toUnderConstruction("client", "machine"); //cast to INodeFile, should success @@ -1079,7 +1104,7 @@ public void testFilesInGetListingOps() throws Exception { public void testFileUnderConstruction() { replication = 3; final INodeFile file = new INodeFile(INodeId.GRANDFATHER_INODE_ID, null, - perm, 0L, 0L, null, replication, 1024L); + perm, 0L, 0L, null, replication, 1024L, (byte)0); assertFalse(file.isUnderConstruction()); final String clientName = "client"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java index 6d4a4c84ccf3f..66f301b224da1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.StorageType; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; @@ -588,7 +589,8 @@ public RandomDeleterPolicy() { public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection inode, Block block, short replicationFactor, Collection first, - Collection second) { + Collection second, + List excessTypes) { Collection chooseFrom = !first.isEmpty() ? first : second; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index 899b888f4423d..8f7d11ae7e628 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -194,7 +194,7 @@ private DFSClient genClientWithDummyHandler() throws IOException { URI nnUri = dfs.getUri(); FailoverProxyProvider failoverProxyProvider = NameNodeProxies.createFailoverProxyProvider(conf, - nnUri, ClientProtocol.class, true); + nnUri, ClientProtocol.class, true, null); InvocationHandler dummyHandler = new DummyRetryInvocationHandler( failoverProxyProvider, RetryPolicies .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java index b8150f7e357d9..3eba7db9c2cb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestJsonUtil.java @@ -64,7 +64,7 @@ public void testHdfsFileStatus() { final HdfsFileStatus status = new HdfsFileStatus(1001L, false, 3, 1L << 26, now, now + 10, new FsPermission((short) 0644), "user", "group", DFSUtil.string2Bytes("bar"), DFSUtil.string2Bytes("foo"), - INodeId.GRANDFATHER_INODE_ID, 0, null); + INodeId.GRANDFATHER_INODE_ID, 0, null, (byte) 0); final FileStatus fstatus = toFileStatus(status, parent); System.out.println("status = " + status); System.out.println("fstatus = " + fstatus); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java index faf946004ac64..1758807b84abe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java @@ -139,8 +139,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[2]; testNodes[2] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.setRandomSeed(0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[2]); @@ -152,8 +152,8 @@ public void testSortByDistance() throws Exception { dtestNodes[2] = dataNodes[11]; dtestNodes[3] = dataNodes[9]; dtestNodes[4] = dataNodes[10]; - cluster.sortByDistance(dataNodes[8], dtestNodes, - dtestNodes.length - 2, 0xDEADBEEF, false); + cluster.setRandomSeed(0xDEADBEEF); + cluster.sortByDistance(dataNodes[8], dtestNodes, dtestNodes.length - 2); assertTrue(dtestNodes[0] == dataNodes[8]); assertTrue(dtestNodes[1] == dataNodes[11]); assertTrue(dtestNodes[2] == dataNodes[12]); @@ -164,8 +164,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[0]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.setRandomSeed(0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[0]); assertTrue(testNodes[1] == dataNodes[1]); assertTrue(testNodes[2] == dataNodes[3]); @@ -174,8 +174,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[3]; testNodes[2] = dataNodes[1]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.setRandomSeed(0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[2] == dataNodes[5]); @@ -184,8 +184,8 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEADBEEF, false); + cluster.setRandomSeed(0xDEADBEEF); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[3]); assertTrue(testNodes[2] == dataNodes[5]); @@ -194,24 +194,23 @@ public void testSortByDistance() throws Exception { testNodes[0] = dataNodes[1]; testNodes[1] = dataNodes[5]; testNodes[2] = dataNodes[3]; - cluster.sortByDistance(dataNodes[0], testNodes, - testNodes.length, 0xDEAD, false); + cluster.setRandomSeed(0xDEAD); + cluster.sortByDistance(dataNodes[0], testNodes, testNodes.length); // sortByDistance does not take the "data center" layer into consideration // and it doesn't sort by getDistance, so 1, 5, 3 is also valid here assertTrue(testNodes[0] == dataNodes[1]); assertTrue(testNodes[1] == dataNodes[5]); assertTrue(testNodes[2] == dataNodes[3]); - // Array is just local rack nodes - // Expect a random first node depending on the seed (normally the block ID). + // Array of just rack-local nodes + // Expect a random first node DatanodeDescriptor first = null; boolean foundRandom = false; for (int i=5; i<=7; i++) { testNodes[0] = dataNodes[5]; testNodes[1] = dataNodes[6]; testNodes[2] = dataNodes[7]; - cluster.sortByDistance(dataNodes[i], testNodes, - testNodes.length, 0xBEADED+i, false); + cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length); if (first == null) { first = testNodes[0]; } else { @@ -222,16 +221,15 @@ public void testSortByDistance() throws Exception { } } assertTrue("Expected to find a different first location", foundRandom); - // Array of rack local nodes with randomizeBlockLocationsPerBlock set to - // true - // Expect random order of block locations for same block + + // Array of just remote nodes + // Expect random first node first = null; for (int i = 1; i <= 4; i++) { testNodes[0] = dataNodes[13]; testNodes[1] = dataNodes[14]; testNodes[2] = dataNodes[15]; - cluster.sortByDistance(dataNodes[15 + i], testNodes, testNodes.length, - 0xBEADED, true); + cluster.sortByDistance(dataNodes[i], testNodes, testNodes.length); if (first == null) { first = testNodes[0]; } else { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java index b3e6ee8e3d60e..5c245009bcd3f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java @@ -53,13 +53,15 @@ public class TestTracing { private static Configuration conf; private static MiniDFSCluster cluster; private static DistributedFileSystem dfs; + private static SpanReceiverHost spanReceiverHost; @Test - public void testSpanReceiverHost() throws Exception { - Configuration conf = new Configuration(); - conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY, - SetSpanReceiver.class.getName()); - SpanReceiverHost spanReceiverHost = SpanReceiverHost.getInstance(conf); + public void testGetSpanReceiverHost() throws Exception { + Configuration c = new Configuration(); + // getting instance already loaded. + c.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY, ""); + SpanReceiverHost s = SpanReceiverHost.getInstance(c); + Assert.assertEquals(spanReceiverHost, s); } @Test @@ -228,8 +230,10 @@ public static void setupCluster() throws IOException { cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(3) .build(); + cluster.waitActive(); dfs = cluster.getFileSystem(); + spanReceiverHost = SpanReceiverHost.getInstance(conf); } @AfterClass diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java index 7fe8a1eab1e42..800cc6bb82047 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.tracing; +import static org.junit.Assume.assumeTrue; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; @@ -27,6 +29,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; +import org.apache.hadoop.util.NativeCodeLoader; import org.htrace.Sampler; import org.htrace.Span; import org.htrace.Trace; @@ -59,6 +62,7 @@ public static void shutdown() throws IOException { @Test public void testShortCircuitTraceHooks() throws IOException { + assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS); conf = new Configuration(); conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY, TestTracing.SetSpanReceiver.class.getName()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index 754f690a4c858..ecfbb9f5fca4a 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored differ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index 7cfb689466b6b..8cafa9f2fa028 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1,6 +1,6 @@ - -59 + -60 OP_START_LOG_SEGMENT @@ -13,8 +13,8 @@ 2 1 - 1403590428625 - 16f34bfba67b2552 + 1410915997709 + 309e81e09dc6c75a @@ -24,8 +24,8 @@ 3 2 - 1403590428631 - dbe6282854469833 + 1410915997711 + 8a2399843e754bee @@ -37,19 +37,19 @@ 16386 /file_create 1 - 1402899229669 - 1402899229669 + 1410224798292 + 1410224798292 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 - false + true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 8 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 5 @@ -60,52 +60,60 @@ 0 /file_create 1 - 1402899229711 - 1402899229669 + 1410224798315 + 1410224798292 512 false - andrew + jing supergroup 420 - OP_RENAME_OLD + OP_SET_STORAGE_POLICY 6 + /file_create + 12 + + + + OP_RENAME_OLD + + 7 0 /file_create /file_moved - 1402899229718 - e03f4a52-3d85-4e05-8942-286185e639bd - 10 + 1410224798322 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 8 OP_DELETE - 7 + 8 0 /file_moved - 1402899229730 - e03f4a52-3d85-4e05-8942-286185e639bd - 11 + 1410224798328 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 9 OP_MKDIR - 8 + 9 0 16387 /directory_mkdir - 1402899229748 + 1410224798335 - andrew + jing supergroup 493 @@ -114,138 +122,94 @@ OP_ALLOW_SNAPSHOT - 9 + 10 /directory_mkdir OP_DISALLOW_SNAPSHOT - 10 + 11 /directory_mkdir OP_ALLOW_SNAPSHOT - 11 + 12 /directory_mkdir OP_CREATE_SNAPSHOT - 12 + 13 /directory_mkdir snapshot1 - e03f4a52-3d85-4e05-8942-286185e639bd - 16 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 14 OP_RENAME_SNAPSHOT - 13 + 14 /directory_mkdir snapshot1 snapshot2 - e03f4a52-3d85-4e05-8942-286185e639bd - 17 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 15 OP_DELETE_SNAPSHOT - 14 + 15 /directory_mkdir snapshot2 - e03f4a52-3d85-4e05-8942-286185e639bd - 18 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 16 OP_ADD - - 15 - 0 - 16388 - /file_create - 1 - 1402899229871 - 1402899229871 - 512 - DFSClient_NONMAPREDUCE_1233039831_1 - 127.0.0.1 - false - - andrew - supergroup - 420 - - e03f4a52-3d85-4e05-8942-286185e639bd - 19 - - - - OP_CLOSE 16 0 - 0 - /file_create - 1 - 1402899229881 - 1402899229871 - 512 - - - false - - andrew - supergroup - 420 - - - - - OP_ADD - - 17 - 0 16388 /file_create 1 - 1402899229912 - 1402899229912 + 1410224798359 + 1410224798359 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 21 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 17 OP_CLOSE - 18 + 17 0 0 /file_create 1 - 1402899229931 - 1402899229912 + 1410224798361 + 1410224798359 512 false - andrew + jing supergroup 420 @@ -254,7 +218,7 @@ OP_SET_REPLICATION - 19 + 18 /file_create 1 @@ -262,7 +226,7 @@ OP_SET_PERMISSIONS - 20 + 19 /file_create 511 @@ -270,7 +234,7 @@ OP_SET_OWNER - 21 + 20 /file_create newOwner @@ -278,7 +242,7 @@ OP_TIMES - 22 + 21 0 /file_create 1285195527000 @@ -288,7 +252,7 @@ OP_SET_QUOTA - 23 + 22 /directory_mkdir 1000 -1 @@ -297,57 +261,57 @@ OP_RENAME - 24 + 23 0 /file_create /file_moved - 1402899229963 + 1410224798379 NONE - e03f4a52-3d85-4e05-8942-286185e639bd - 26 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 24 OP_ADD - 25 + 24 0 16389 /file_concat_target 1 - 1402899229981 - 1402899229981 + 1410224798382 + 1410224798382 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 - false + true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 28 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 26 OP_ALLOCATE_BLOCK_ID - 26 + 25 1073741825 OP_SET_GENSTAMP_V2 - 27 + 26 1001 OP_ADD_BLOCK - 28 + 27 /file_concat_target 1073741825 @@ -361,21 +325,21 @@ OP_ALLOCATE_BLOCK_ID - 29 + 28 1073741826 OP_SET_GENSTAMP_V2 - 30 + 29 1002 OP_ADD_BLOCK - 31 + 30 /file_concat_target 1073741825 @@ -394,21 +358,21 @@ OP_ALLOCATE_BLOCK_ID - 32 + 31 1073741827 OP_SET_GENSTAMP_V2 - 33 + 32 1003 OP_ADD_BLOCK - 34 + 33 /file_concat_target 1073741826 @@ -427,13 +391,13 @@ OP_CLOSE - 35 + 34 0 0 /file_concat_target 1 - 1402899230219 - 1402899229981 + 1410224798476 + 1410224798382 512 @@ -454,7 +418,7 @@ 1003 - andrew + jing supergroup 420 @@ -463,44 +427,44 @@ OP_ADD - 36 + 35 0 16390 /file_concat_0 1 - 1402899230235 - 1402899230235 + 1410224798479 + 1410224798479 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 - false + true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 41 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 39 OP_ALLOCATE_BLOCK_ID - 37 + 36 1073741828 OP_SET_GENSTAMP_V2 - 38 + 37 1004 OP_ADD_BLOCK - 39 + 38 /file_concat_0 1073741828 @@ -514,21 +478,21 @@ OP_ALLOCATE_BLOCK_ID - 40 + 39 1073741829 OP_SET_GENSTAMP_V2 - 41 + 40 1005 OP_ADD_BLOCK - 42 + 41 /file_concat_0 1073741828 @@ -547,21 +511,21 @@ OP_ALLOCATE_BLOCK_ID - 43 + 42 1073741830 OP_SET_GENSTAMP_V2 - 44 + 43 1006 OP_ADD_BLOCK - 45 + 44 /file_concat_0 1073741829 @@ -580,13 +544,13 @@ OP_CLOSE - 46 + 45 0 0 /file_concat_0 1 - 1402899230307 - 1402899230235 + 1410224798501 + 1410224798479 512 @@ -607,7 +571,7 @@ 1006 - andrew + jing supergroup 420 @@ -616,44 +580,44 @@ OP_ADD - 47 + 46 0 16391 /file_concat_1 1 - 1402899230320 - 1402899230320 + 1410224798504 + 1410224798504 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 - false + true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 53 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 51 OP_ALLOCATE_BLOCK_ID - 48 + 47 1073741831 OP_SET_GENSTAMP_V2 - 49 + 48 1007 OP_ADD_BLOCK - 50 + 49 /file_concat_1 1073741831 @@ -667,21 +631,21 @@ OP_ALLOCATE_BLOCK_ID - 51 + 50 1073741832 OP_SET_GENSTAMP_V2 - 52 + 51 1008 OP_ADD_BLOCK - 53 + 52 /file_concat_1 1073741831 @@ -700,21 +664,21 @@ OP_ALLOCATE_BLOCK_ID - 54 + 53 1073741833 OP_SET_GENSTAMP_V2 - 55 + 54 1009 OP_ADD_BLOCK - 56 + 55 /file_concat_1 1073741832 @@ -733,13 +697,13 @@ OP_CLOSE - 57 + 56 0 0 /file_concat_1 1 - 1402899230383 - 1402899230320 + 1410224798530 + 1410224798504 512 @@ -760,7 +724,7 @@ 1009 - andrew + jing supergroup 420 @@ -769,78 +733,78 @@ OP_CONCAT_DELETE - 58 + 57 0 /file_concat_target - 1402899230394 + 1410224798533 /file_concat_0 /file_concat_1 - e03f4a52-3d85-4e05-8942-286185e639bd - 64 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 62 OP_SYMLINK - 59 + 58 0 16392 /file_symlink /file_concat_target - 1402899230406 - 1402899230406 + 1410224798537 + 1410224798537 - andrew + jing supergroup 511 - e03f4a52-3d85-4e05-8942-286185e639bd - 65 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 63 OP_ADD - 60 + 59 0 16393 /hard-lease-recovery-test 1 - 1402899230413 - 1402899230413 + 1410224798540 + 1410224798540 512 - DFSClient_NONMAPREDUCE_1233039831_1 + DFSClient_NONMAPREDUCE_1374813776_1 127.0.0.1 - false + true - andrew + jing supergroup 420 - e03f4a52-3d85-4e05-8942-286185e639bd - 66 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 64 OP_ALLOCATE_BLOCK_ID - 61 + 60 1073741834 OP_SET_GENSTAMP_V2 - 62 + 61 1010 OP_ADD_BLOCK - 63 + 62 /hard-lease-recovery-test 1073741834 @@ -854,7 +818,7 @@ OP_UPDATE_BLOCKS - 64 + 63 /hard-lease-recovery-test 1073741834 @@ -868,15 +832,15 @@ OP_SET_GENSTAMP_V2 - 65 + 64 1011 OP_REASSIGN_LEASE - 66 - DFSClient_NONMAPREDUCE_1233039831_1 + 65 + DFSClient_NONMAPREDUCE_1374813776_1 /hard-lease-recovery-test HDFS_NameNode @@ -884,13 +848,13 @@ OP_CLOSE - 67 + 66 0 0 /hard-lease-recovery-test 1 - 1402899232526 - 1402899230413 + 1410224801265 + 1410224798540 512 @@ -901,7 +865,7 @@ 1011 - andrew + jing supergroup 420 @@ -910,72 +874,72 @@ OP_ADD_CACHE_POOL - 68 + 67 pool1 - andrew - andrew + jing + staff 493 9223372036854775807 2305843009213693951 - e03f4a52-3d85-4e05-8942-286185e639bd - 73 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 71 OP_MODIFY_CACHE_POOL - 69 + 68 pool1 99 - e03f4a52-3d85-4e05-8942-286185e639bd - 74 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 72 OP_ADD_CACHE_DIRECTIVE - 70 + 69 1 /path 1 pool1 - 2305844412112927450 - e03f4a52-3d85-4e05-8942-286185e639bd - 75 + 2305844419438495525 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 73 OP_MODIFY_CACHE_DIRECTIVE - 71 + 70 1 2 - e03f4a52-3d85-4e05-8942-286185e639bd - 76 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 74 OP_REMOVE_CACHE_DIRECTIVE - 72 + 71 1 - e03f4a52-3d85-4e05-8942-286185e639bd - 77 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 75 OP_REMOVE_CACHE_POOL - 73 + 72 pool1 - e03f4a52-3d85-4e05-8942-286185e639bd - 78 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 76 OP_SET_ACL - 74 + 73 /file_concat_target ACCESS @@ -1008,62 +972,62 @@ OP_SET_XATTR - 75 + 74 /file_concat_target USER a1 0x313233 - e03f4a52-3d85-4e05-8942-286185e639bd - 80 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 78 OP_SET_XATTR - 76 + 75 /file_concat_target USER a2 0x373839 - e03f4a52-3d85-4e05-8942-286185e639bd - 81 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 79 OP_REMOVE_XATTR - 77 + 76 /file_concat_target USER a2 - e03f4a52-3d85-4e05-8942-286185e639bd - 82 + b53e8d0a-8d92-4067-b8c8-637ac951bac2 + 80 OP_ROLLING_UPGRADE_START - 78 - 1402899233646 + 77 + 1410224801616 OP_ROLLING_UPGRADE_FINALIZE - 79 - 1402899233647 + 78 + 1410224801616 OP_END_LOG_SEGMENT - 80 + 79 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testXAttrConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testXAttrConf.xml index 3414f5719dd9c..9c66cba848736 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testXAttrConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testXAttrConf.xml @@ -124,6 +124,79 @@ + + + setfattr : Add the unreadable by superuser xattr to security namespace + + -fs NAMENODE -touchz /file1 + -fs NAMENODE -setfattr -n security.hdfs.unreadable.by.superuser /file1 + -fs NAMENODE -getfattr -d /file1 + + + -fs NAMENODE -rm /file1 + + + + SubstringComparator + security.hdfs.unreadable.by.superuser + + + + + + setfattr : Try to delete the unreadable by superuser xattr from security namespace + + -fs NAMENODE -touchz /file1 + -fs NAMENODE -setfattr -n security.hdfs.unreadable.by.superuser /file1 + -fs NAMENODE -setfattr -x security.hdfs.unreadable.by.superuser /file1 + + + -fs NAMENODE -rm /file1 + + + + SubstringComparator + can not be deleted + + + + + + setfattr : Try to read a file protected by the unreadable by superuser xattr + + -fs NAMENODE -touchz /file1 + -fs NAMENODE -setfattr -n security.hdfs.unreadable.by.superuser /file1 + -fs NAMENODE -get /file1 /tmp/file1 + + + -fs NAMENODE -rm /file1 + rm /tmp/file1 + + + + SubstringComparator + Access is denied + + + + + + setfattr : Try to add a value to the unreadable by superuser xattr + + -fs NAMENODE -touchz /file1 + -fs NAMENODE -setfattr -n security.hdfs.unreadable.by.superuser /file1 + -fs NAMENODE -setfattr -n security.hdfs.unreadable.by.superuser -v 1234 /file1 + + + -fs NAMENODE -rm /file1 + + + + SubstringComparator + Values are not allowed + + + setfattr : Add an xattr of raw namespace diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 5d1e5f53babb6..859c8998a6600 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -251,6 +251,13 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5130. Add missing job config options to mapred-default.xml (Ray Chiang via Sandy Ryza) + MAPREDUCE-5891. Improved shuffle error handling across NM restarts + (Junping Du via jlowe) + + MAPREDUCE-5279. Made MR headroom calculation honor cpu dimension when YARN + scheduler resource type is memory plus cpu. (Peng Zhang and Varun Vasudev + via zjshen) + OPTIMIZATIONS BUG FIXES @@ -347,6 +354,19 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6070. yarn.app.am.resource.mb/cpu-vcores affects uber mode but is not documented (Tsuyoshi OZAWA via jlowe) + MAPREDUCE-6090. mapred hsadmin getGroups fails to connect in some cases + (Robert Kanter via jlowe) + + MAPREDUCE-6086. mapreduce.job.credentials.binary should allow all URIs. + (Zhihai Xu via kasha) + + MAPREDUCE-6091. YARNRunner.getJobStatus() fails with + ApplicationNotFoundException if the job rolled off the RM view (Sangjin + Lee via jlowe) + + MAPREDUCE-6095. Enable DistributedCache for uber-mode Jobs (Gera Shegalov + via jlowe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java index 4ba1991ed9bac..92bbc4a773870 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java @@ -23,15 +23,11 @@ import java.io.IOException; import java.io.OutputStream; import java.net.InetSocketAddress; -import java.net.URI; import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.List; import java.util.concurrent.ScheduledExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSError; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; @@ -43,7 +39,6 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.counters.Limits; -import org.apache.hadoop.mapreduce.filecache.DistributedCache; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; @@ -307,7 +302,7 @@ private static void configureTask(JobConf job, Task task, task.localizeConfiguration(job); // Set up the DistributedCache related configs - setupDistributedCacheConfig(job); + MRApps.setupDistributedCacheLocal(job); // Overwrite the localized task jobconf which is linked to in the current // work-dir. @@ -317,62 +312,6 @@ private static void configureTask(JobConf job, Task task, task.setConf(job); } - /** - * Set up the DistributedCache related configs to make - * {@link DistributedCache#getLocalCacheFiles(Configuration)} - * and - * {@link DistributedCache#getLocalCacheArchives(Configuration)} - * working. - * @param job - * @throws IOException - */ - private static void setupDistributedCacheConfig(final JobConf job) - throws IOException { - - String localWorkDir = System.getenv("PWD"); - // ^ ^ all symlinks are created in the current work-dir - - // Update the configuration object with localized archives. - URI[] cacheArchives = DistributedCache.getCacheArchives(job); - if (cacheArchives != null) { - List localArchives = new ArrayList(); - for (int i = 0; i < cacheArchives.length; ++i) { - URI u = cacheArchives[i]; - Path p = new Path(u); - Path name = - new Path((null == u.getFragment()) ? p.getName() - : u.getFragment()); - String linkName = name.toUri().getPath(); - localArchives.add(new Path(localWorkDir, linkName).toUri().getPath()); - } - if (!localArchives.isEmpty()) { - job.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils - .arrayToString(localArchives.toArray(new String[localArchives - .size()]))); - } - } - - // Update the configuration object with localized files. - URI[] cacheFiles = DistributedCache.getCacheFiles(job); - if (cacheFiles != null) { - List localFiles = new ArrayList(); - for (int i = 0; i < cacheFiles.length; ++i) { - URI u = cacheFiles[i]; - Path p = new Path(u); - Path name = - new Path((null == u.getFragment()) ? p.getName() - : u.getFragment()); - String linkName = name.toUri().getPath(); - localFiles.add(new Path(localWorkDir, linkName).toUri().getPath()); - } - if (!localFiles.isEmpty()) { - job.set(MRJobConfig.CACHE_LOCALFILES, - StringUtils.arrayToString(localFiles - .toArray(new String[localFiles.size()]))); - } - } - } - private static final FsPermission urw_gr = FsPermission.createImmutable((short) 0640); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index 59e724904960c..1cf8b29605f15 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -826,6 +826,7 @@ private final class ContainerAllocatorRouter extends AbstractService @Override protected void serviceStart() throws Exception { if (job.isUber()) { + MRApps.setupDistributedCacheLocal(getConfig()); this.containerAllocator = new LocalContainerAllocator( this.clientService, this.context, nmHost, nmPort, nmHttpPort , containerID); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java index 6e9f3138b4ab4..6c58a683d1f27 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; +import java.util.EnumSet; import java.util.Map; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicBoolean; @@ -59,6 +60,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; /** * Registers/unregisters to RM and sends heartbeats to RM. @@ -90,6 +92,8 @@ public abstract class RMCommunicator extends AbstractService private volatile boolean shouldUnregister = true; private boolean isApplicationMasterRegistered = false; + private EnumSet schedulerResourceTypes; + public RMCommunicator(ClientService clientService, AppContext context) { super("RMCommunicator"); this.clientService = clientService; @@ -98,6 +102,7 @@ public RMCommunicator(ClientService clientService, AppContext context) { this.applicationId = context.getApplicationID(); this.stopped = new AtomicBoolean(false); this.heartbeatCallbacks = new ConcurrentLinkedQueue(); + this.schedulerResourceTypes = EnumSet.of(SchedulerResourceTypes.MEMORY); } @Override @@ -163,10 +168,11 @@ protected void register() { setClientToAMToken(response.getClientToAMTokenMasterKey()); } this.applicationACLs = response.getApplicationACLs(); - LOG.info("maxContainerCapability: " + maxContainerCapability.getMemory()); + LOG.info("maxContainerCapability: " + maxContainerCapability); String queue = response.getQueue(); LOG.info("queue: " + queue); job.setQueueName(queue); + this.schedulerResourceTypes.addAll(response.getSchedulerResourceTypes()); } catch (Exception are) { LOG.error("Exception while registering", are); throw new YarnRuntimeException(are); @@ -343,4 +349,8 @@ public void setSignalled(boolean isSignalled) { protected boolean isApplicationMasterRegistered() { return isApplicationMasterRegistered; } + + public EnumSet getSchedulerResourceTypes() { + return schedulerResourceTypes; + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index 6cb019181805c..fb8771af6a72e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -73,6 +73,7 @@ import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.PreemptionMessage; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.client.api.NMTokenCache; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -80,6 +81,7 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.RackResolver; +import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @@ -149,8 +151,8 @@ added to the pending and are ramped up (added to scheduled) based private int lastCompletedTasks = 0; private boolean recalculateReduceSchedule = false; - private int mapResourceRequest;//memory - private int reduceResourceRequest;//memory + private Resource mapResourceRequest = Resources.none(); + private Resource reduceResourceRequest = Resources.none(); private boolean reduceStarted = false; private float maxReduceRampupLimit = 0; @@ -328,49 +330,61 @@ protected synchronized void handleEvent(ContainerAllocatorEvent event) { if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) { ContainerRequestEvent reqEvent = (ContainerRequestEvent) event; JobId jobId = getJob().getID(); - int supportedMaxContainerCapability = - getMaxContainerCapability().getMemory(); + Resource supportedMaxContainerCapability = getMaxContainerCapability(); if (reqEvent.getAttemptID().getTaskId().getTaskType().equals(TaskType.MAP)) { - if (mapResourceRequest == 0) { - mapResourceRequest = reqEvent.getCapability().getMemory(); - eventHandler.handle(new JobHistoryEvent(jobId, - new NormalizedResourceEvent(org.apache.hadoop.mapreduce.TaskType.MAP, - mapResourceRequest))); - LOG.info("mapResourceRequest:"+ mapResourceRequest); - if (mapResourceRequest > supportedMaxContainerCapability) { - String diagMsg = "MAP capability required is more than the supported " + - "max container capability in the cluster. Killing the Job. mapResourceRequest: " + - mapResourceRequest + " maxContainerCapability:" + supportedMaxContainerCapability; + if (mapResourceRequest.equals(Resources.none())) { + mapResourceRequest = reqEvent.getCapability(); + eventHandler.handle(new JobHistoryEvent(jobId, + new NormalizedResourceEvent( + org.apache.hadoop.mapreduce.TaskType.MAP, mapResourceRequest + .getMemory()))); + LOG.info("mapResourceRequest:" + mapResourceRequest); + if (mapResourceRequest.getMemory() > supportedMaxContainerCapability + .getMemory() + || mapResourceRequest.getVirtualCores() > supportedMaxContainerCapability + .getVirtualCores()) { + String diagMsg = + "MAP capability required is more than the supported " + + "max container capability in the cluster. Killing the Job. mapResourceRequest: " + + mapResourceRequest + " maxContainerCapability:" + + supportedMaxContainerCapability; LOG.info(diagMsg); - eventHandler.handle(new JobDiagnosticsUpdateEvent( - jobId, diagMsg)); + eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg)); eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); } } - //set the rounded off memory - reqEvent.getCapability().setMemory(mapResourceRequest); + // set the resources + reqEvent.getCapability().setMemory(mapResourceRequest.getMemory()); + reqEvent.getCapability().setVirtualCores( + mapResourceRequest.getVirtualCores()); scheduledRequests.addMap(reqEvent);//maps are immediately scheduled } else { - if (reduceResourceRequest == 0) { - reduceResourceRequest = reqEvent.getCapability().getMemory(); - eventHandler.handle(new JobHistoryEvent(jobId, - new NormalizedResourceEvent( - org.apache.hadoop.mapreduce.TaskType.REDUCE, - reduceResourceRequest))); - LOG.info("reduceResourceRequest:"+ reduceResourceRequest); - if (reduceResourceRequest > supportedMaxContainerCapability) { - String diagMsg = "REDUCE capability required is more than the " + - "supported max container capability in the cluster. Killing the " + - "Job. reduceResourceRequest: " + reduceResourceRequest + - " maxContainerCapability:" + supportedMaxContainerCapability; + if (reduceResourceRequest.equals(Resources.none())) { + reduceResourceRequest = reqEvent.getCapability(); + eventHandler.handle(new JobHistoryEvent(jobId, + new NormalizedResourceEvent( + org.apache.hadoop.mapreduce.TaskType.REDUCE, + reduceResourceRequest.getMemory()))); + LOG.info("reduceResourceRequest:" + reduceResourceRequest); + if (reduceResourceRequest.getMemory() > supportedMaxContainerCapability + .getMemory() + || reduceResourceRequest.getVirtualCores() > supportedMaxContainerCapability + .getVirtualCores()) { + String diagMsg = + "REDUCE capability required is more than the " + + "supported max container capability in the cluster. Killing the " + + "Job. reduceResourceRequest: " + reduceResourceRequest + + " maxContainerCapability:" + + supportedMaxContainerCapability; LOG.info(diagMsg); - eventHandler.handle(new JobDiagnosticsUpdateEvent( - jobId, diagMsg)); + eventHandler.handle(new JobDiagnosticsUpdateEvent(jobId, diagMsg)); eventHandler.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); } } - //set the rounded off memory - reqEvent.getCapability().setMemory(reduceResourceRequest); + // set the resources + reqEvent.getCapability().setMemory(reduceResourceRequest.getMemory()); + reqEvent.getCapability().setVirtualCores( + reduceResourceRequest.getVirtualCores()); if (reqEvent.getEarlierAttemptFailed()) { //add to the front of queue for fail fast pendingReduces.addFirst(new ContainerRequest(reqEvent, PRIORITY_REDUCE)); @@ -425,34 +439,40 @@ private static String getHost(String contMgrAddress) { @Private @VisibleForTesting - synchronized void setReduceResourceRequest(int mem) { - this.reduceResourceRequest = mem; + synchronized void setReduceResourceRequest(Resource res) { + this.reduceResourceRequest = res; } @Private @VisibleForTesting - synchronized void setMapResourceRequest(int mem) { - this.mapResourceRequest = mem; + synchronized void setMapResourceRequest(Resource res) { + this.mapResourceRequest = res; } @Private @VisibleForTesting void preemptReducesIfNeeded() { - if (reduceResourceRequest == 0) { - return; //no reduces + if (reduceResourceRequest.equals(Resources.none())) { + return; // no reduces } //check if reduces have taken over the whole cluster and there are //unassigned maps if (scheduledRequests.maps.size() > 0) { - int memLimit = getMemLimit(); - int availableMemForMap = memLimit - ((assignedRequests.reduces.size() - - assignedRequests.preemptionWaitingReduces.size()) * reduceResourceRequest); - //availableMemForMap must be sufficient to run atleast 1 map - if (availableMemForMap < mapResourceRequest) { - //to make sure new containers are given to maps and not reduces - //ramp down all scheduled reduces if any - //(since reduces are scheduled at higher priority than maps) - LOG.info("Ramping down all scheduled reduces:" + scheduledRequests.reduces.size()); + Resource resourceLimit = getResourceLimit(); + Resource availableResourceForMap = + Resources.subtract( + resourceLimit, + Resources.multiply(reduceResourceRequest, + assignedRequests.reduces.size() + - assignedRequests.preemptionWaitingReduces.size())); + // availableMemForMap must be sufficient to run at least 1 map + if (ResourceCalculatorUtils.computeAvailableContainers(availableResourceForMap, + mapResourceRequest, getSchedulerResourceTypes()) <= 0) { + // to make sure new containers are given to maps and not reduces + // ramp down all scheduled reduces if any + // (since reduces are scheduled at higher priority than maps) + LOG.info("Ramping down all scheduled reduces:" + + scheduledRequests.reduces.size()); for (ContainerRequest req : scheduledRequests.reduces.values()) { pendingReduces.add(req); } @@ -462,17 +482,25 @@ void preemptReducesIfNeeded() { //hanging around for a while int hangingMapRequests = getNumOfHangingRequests(scheduledRequests.maps); if (hangingMapRequests > 0) { - //preempt for making space for at least one map - int premeptionLimit = Math.max(mapResourceRequest, - (int) (maxReducePreemptionLimit * memLimit)); - - int preemptMem = Math.min(hangingMapRequests * mapResourceRequest, - premeptionLimit); - - int toPreempt = (int) Math.ceil((float) preemptMem / reduceResourceRequest); - toPreempt = Math.min(toPreempt, assignedRequests.reduces.size()); - - LOG.info("Going to preempt " + toPreempt + " due to lack of space for maps"); + // preempt for making space for at least one map + int preemptionReduceNumForOneMap = + ResourceCalculatorUtils.divideAndCeilContainers(mapResourceRequest, + reduceResourceRequest, getSchedulerResourceTypes()); + int preemptionReduceNumForPreemptionLimit = + ResourceCalculatorUtils.divideAndCeilContainers( + Resources.multiply(resourceLimit, maxReducePreemptionLimit), + reduceResourceRequest, getSchedulerResourceTypes()); + int preemptionReduceNumForAllMaps = + ResourceCalculatorUtils.divideAndCeilContainers( + Resources.multiply(mapResourceRequest, hangingMapRequests), + reduceResourceRequest, getSchedulerResourceTypes()); + int toPreempt = + Math.min(Math.max(preemptionReduceNumForOneMap, + preemptionReduceNumForPreemptionLimit), + preemptionReduceNumForAllMaps); + + LOG.info("Going to preempt " + toPreempt + + " due to lack of space for maps"); assignedRequests.preemptReduce(toPreempt); } } @@ -497,7 +525,7 @@ public void scheduleReduces( int totalMaps, int completedMaps, int scheduledMaps, int scheduledReduces, int assignedMaps, int assignedReduces, - int mapResourceReqt, int reduceResourceReqt, + Resource mapResourceReqt, Resource reduceResourceReqt, int numPendingReduces, float maxReduceRampupLimit, float reduceSlowStart) { @@ -505,8 +533,12 @@ public void scheduleReduces( return; } - int headRoom = getAvailableResources() != null ? - getAvailableResources().getMemory() : 0; + // get available resources for this job + Resource headRoom = getAvailableResources(); + if (headRoom == null) { + headRoom = Resources.none(); + } + LOG.info("Recalculating schedule, headroom=" + headRoom); //check for slow start @@ -540,49 +572,60 @@ public void scheduleReduces( completedMapPercent = 1; } - int netScheduledMapMem = - (scheduledMaps + assignedMaps) * mapResourceReqt; + Resource netScheduledMapResource = + Resources.multiply(mapResourceReqt, (scheduledMaps + assignedMaps)); - int netScheduledReduceMem = - (scheduledReduces + assignedReduces) * reduceResourceReqt; + Resource netScheduledReduceResource = + Resources.multiply(reduceResourceReqt, + (scheduledReduces + assignedReduces)); + + Resource finalMapResourceLimit; + Resource finalReduceResourceLimit; - int finalMapMemLimit = 0; - int finalReduceMemLimit = 0; - // ramp up the reduces based on completed map percentage - int totalMemLimit = getMemLimit(); - int idealReduceMemLimit = - Math.min( - (int)(completedMapPercent * totalMemLimit), - (int) (maxReduceRampupLimit * totalMemLimit)); - int idealMapMemLimit = totalMemLimit - idealReduceMemLimit; + Resource totalResourceLimit = getResourceLimit(); + + Resource idealReduceResourceLimit = + Resources.multiply(totalResourceLimit, + Math.min(completedMapPercent, maxReduceRampupLimit)); + Resource ideaMapResourceLimit = + Resources.subtract(totalResourceLimit, idealReduceResourceLimit); // check if there aren't enough maps scheduled, give the free map capacity - // to reduce - if (idealMapMemLimit > netScheduledMapMem) { - int unusedMapMemLimit = idealMapMemLimit - netScheduledMapMem; - finalReduceMemLimit = idealReduceMemLimit + unusedMapMemLimit; - finalMapMemLimit = totalMemLimit - finalReduceMemLimit; + // to reduce. + // Even when container number equals, there may be unused resources in one + // dimension + if (ResourceCalculatorUtils.computeAvailableContainers(ideaMapResourceLimit, + mapResourceReqt, getSchedulerResourceTypes()) >= (scheduledMaps + assignedMaps)) { + // enough resource given to maps, given the remaining to reduces + Resource unusedMapResourceLimit = + Resources.subtract(ideaMapResourceLimit, netScheduledMapResource); + finalReduceResourceLimit = + Resources.add(idealReduceResourceLimit, unusedMapResourceLimit); + finalMapResourceLimit = + Resources.subtract(totalResourceLimit, finalReduceResourceLimit); } else { - finalMapMemLimit = idealMapMemLimit; - finalReduceMemLimit = idealReduceMemLimit; + finalMapResourceLimit = ideaMapResourceLimit; + finalReduceResourceLimit = idealReduceResourceLimit; } - - LOG.info("completedMapPercent " + completedMapPercent + - " totalMemLimit:" + totalMemLimit + - " finalMapMemLimit:" + finalMapMemLimit + - " finalReduceMemLimit:" + finalReduceMemLimit + - " netScheduledMapMem:" + netScheduledMapMem + - " netScheduledReduceMem:" + netScheduledReduceMem); - - int rampUp = - (finalReduceMemLimit - netScheduledReduceMem) / reduceResourceReqt; - + + LOG.info("completedMapPercent " + completedMapPercent + + " totalResourceLimit:" + totalResourceLimit + + " finalMapResourceLimit:" + finalMapResourceLimit + + " finalReduceResourceLimit:" + finalReduceResourceLimit + + " netScheduledMapResource:" + netScheduledMapResource + + " netScheduledReduceResource:" + netScheduledReduceResource); + + int rampUp = + ResourceCalculatorUtils.computeAvailableContainers(Resources.subtract( + finalReduceResourceLimit, netScheduledReduceResource), + reduceResourceReqt, getSchedulerResourceTypes()); + if (rampUp > 0) { rampUp = Math.min(rampUp, numPendingReduces); LOG.info("Ramping up " + rampUp); rampUpReduces(rampUp); - } else if (rampUp < 0){ + } else if (rampUp < 0) { int rampDown = -1 * rampUp; rampDown = Math.min(rampDown, scheduledReduces); LOG.info("Ramping down " + rampDown); @@ -618,8 +661,10 @@ public void rampDownReduces(int rampDown) { @SuppressWarnings("unchecked") private List getResources() throws Exception { - int headRoom = getAvailableResources() != null - ? getAvailableResources().getMemory() : 0;//first time it would be null + // will be null the first time + Resource headRoom = + getAvailableResources() == null ? Resources.none() : + Resources.clone(getAvailableResources()); AllocateResponse response; /* * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS @@ -670,7 +715,9 @@ private List getResources() throws Exception { throw new YarnRuntimeException(msg); } } - int newHeadRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0; + Resource newHeadRoom = + getAvailableResources() == null ? Resources.none() + : getAvailableResources(); List newContainers = response.getAllocatedContainers(); // Setting NMTokens if (response.getNMTokens() != null) { @@ -694,10 +741,11 @@ private List getResources() throws Exception { new PreemptionContext(assignedRequests), preemptReq); } - if (newContainers.size() + finishedContainers.size() > 0 || headRoom != newHeadRoom) { + if (newContainers.size() + finishedContainers.size() > 0 + || !headRoom.equals(newHeadRoom)) { //something changed recalculateReduceSchedule = true; - if (LOG.isDebugEnabled() && headRoom != newHeadRoom) { + if (LOG.isDebugEnabled() && !headRoom.equals(newHeadRoom)) { LOG.debug("headroom=" + newHeadRoom); } } @@ -802,10 +850,18 @@ private void handleUpdatedNodes(AllocateResponse response) { } @Private - public int getMemLimit() { - int headRoom = getAvailableResources() != null ? getAvailableResources().getMemory() : 0; - return headRoom + assignedRequests.maps.size() * mapResourceRequest + - assignedRequests.reduces.size() * reduceResourceRequest; + public Resource getResourceLimit() { + Resource headRoom = getAvailableResources(); + if (headRoom == null) { + headRoom = Resources.none(); + } + Resource assignedMapResource = + Resources.multiply(mapResourceRequest, assignedRequests.maps.size()); + Resource assignedReduceResource = + Resources.multiply(reduceResourceRequest, + assignedRequests.reduces.size()); + return Resources.add(headRoom, + Resources.add(assignedMapResource, assignedReduceResource)); } @Private @@ -914,10 +970,11 @@ private void assign(List allocatedContainers) { // a container to be assigned boolean isAssignable = true; Priority priority = allocated.getPriority(); - int allocatedMemory = allocated.getResource().getMemory(); + Resource allocatedResource = allocated.getResource(); if (PRIORITY_FAST_FAIL_MAP.equals(priority) || PRIORITY_MAP.equals(priority)) { - if (allocatedMemory < mapResourceRequest + if (ResourceCalculatorUtils.computeAvailableContainers(allocatedResource, + mapResourceRequest, getSchedulerResourceTypes()) <= 0 || maps.isEmpty()) { LOG.info("Cannot assign container " + allocated + " for a map as either " @@ -928,7 +985,8 @@ private void assign(List allocatedContainers) { } } else if (PRIORITY_REDUCE.equals(priority)) { - if (allocatedMemory < reduceResourceRequest + if (ResourceCalculatorUtils.computeAvailableContainers(allocatedResource, + reduceResourceRequest, getSchedulerResourceTypes()) <= 0 || reduces.isEmpty()) { LOG.info("Cannot assign container " + allocated + " for a reduce as either " diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java new file mode 100644 index 0000000000000..b9bc8b595eced --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/ResourceCalculatorUtils.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.app.rm; + +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.util.Records; + +import java.util.EnumSet; + +public class ResourceCalculatorUtils { + public static int divideAndCeil(int a, int b) { + if (b == 0) { + return 0; + } + return (a + (b - 1)) / b; + } + + public static int computeAvailableContainers(Resource available, + Resource required, EnumSet resourceTypes) { + if (resourceTypes.contains(SchedulerResourceTypes.CPU)) { + return Math.min(available.getMemory() / required.getMemory(), + available.getVirtualCores() / required.getVirtualCores()); + } + return available.getMemory() / required.getMemory(); + } + + public static int divideAndCeilContainers(Resource required, Resource factor, + EnumSet resourceTypes) { + if (resourceTypes.contains(SchedulerResourceTypes.CPU)) { + return Math.max(divideAndCeil(required.getMemory(), factor.getMemory()), + divideAndCeil(required.getVirtualCores(), factor.getVirtualCores())); + } + return divideAndCeil(required.getMemory(), factor.getMemory()); + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java index 9664b8f423eba..341e67354a1a1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.v2.app.rm; +import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyFloat; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.isA; @@ -30,19 +31,14 @@ import static org.mockito.Mockito.when; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.ClusterInfo; import org.apache.hadoop.mapreduce.v2.app.MRApp; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.junit.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -458,8 +454,8 @@ public void testPreemptReducers() throws Exception { 0, 0, 0, 0, 0, 0, "jobfile", null, false, "")); MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob, new SystemClock()); - allocator.setMapResourceRequest(1024); - allocator.setReduceResourceRequest(1024); + allocator.setMapResourceRequest(BuilderUtils.newResource(1024, 1)); + allocator.setReduceResourceRequest(BuilderUtils.newResource(1024, 1)); RMContainerAllocator.AssignedRequests assignedRequests = allocator.getAssignedRequests(); RMContainerAllocator.ScheduledRequests scheduledRequests = @@ -478,7 +474,7 @@ public void testPreemptReducers() throws Exception { @Test(timeout = 30000) public void testNonAggressivelyPreemptReducers() throws Exception { - LOG.info("Running testPreemptReducers"); + LOG.info("Running testNonAggressivelyPreemptReducers"); final int preemptThreshold = 2; //sec Configuration conf = new Configuration(); @@ -513,8 +509,8 @@ public void testNonAggressivelyPreemptReducers() throws Exception { clock.setTime(1); MyContainerAllocator allocator = new MyContainerAllocator(rm, conf, appAttemptId, mockJob, clock); - allocator.setMapResourceRequest(1024); - allocator.setReduceResourceRequest(1024); + allocator.setMapResourceRequest(BuilderUtils.newResource(1024, 1)); + allocator.setReduceResourceRequest(BuilderUtils.newResource(1024, 1)); RMContainerAllocator.AssignedRequests assignedRequests = allocator.getAssignedRequests(); RMContainerAllocator.ScheduledRequests scheduledRequests = @@ -1774,17 +1770,19 @@ public void testReduceScheduling() throws Exception { int scheduledReduces = 0; int assignedMaps = 2; int assignedReduces = 0; - int mapResourceReqt = 1024; - int reduceResourceReqt = 2*1024; + Resource mapResourceReqt = BuilderUtils.newResource(1024, 1); + Resource reduceResourceReqt = BuilderUtils.newResource(2 * 1024, 1); int numPendingReduces = 4; float maxReduceRampupLimit = 0.5f; float reduceSlowStart = 0.2f; RMContainerAllocator allocator = mock(RMContainerAllocator.class); - doCallRealMethod().when(allocator). - scheduleReduces(anyInt(), anyInt(), anyInt(), anyInt(), anyInt(), - anyInt(), anyInt(), anyInt(), anyInt(), anyFloat(), anyFloat()); - + doCallRealMethod().when(allocator).scheduleReduces(anyInt(), anyInt(), + anyInt(), anyInt(), anyInt(), anyInt(), any(Resource.class), + any(Resource.class), anyInt(), anyFloat(), anyFloat()); + doReturn(EnumSet.of(SchedulerResourceTypes.MEMORY)).when(allocator) + .getSchedulerResourceTypes(); + // Test slow-start allocator.scheduleReduces( totalMaps, succeededMaps, @@ -1808,6 +1806,7 @@ public void testReduceScheduling() throws Exception { verify(allocator, never()).scheduleAllReduces(); succeededMaps = 3; + doReturn(BuilderUtils.newResource(0, 0)).when(allocator).getResourceLimit(); allocator.scheduleReduces( totalMaps, succeededMaps, scheduledMaps, scheduledReduces, @@ -1818,7 +1817,8 @@ public void testReduceScheduling() throws Exception { verify(allocator, times(1)).setIsReduceStarted(true); // Test reduce ramp-up - doReturn(100 * 1024).when(allocator).getMemLimit(); + doReturn(BuilderUtils.newResource(100 * 1024, 100 * 1)).when(allocator) + .getResourceLimit(); allocator.scheduleReduces( totalMaps, succeededMaps, scheduledMaps, scheduledReduces, @@ -1831,13 +1831,14 @@ public void testReduceScheduling() throws Exception { // Test reduce ramp-down scheduledReduces = 3; - doReturn(10 * 1024).when(allocator).getMemLimit(); + doReturn(BuilderUtils.newResource(10 * 1024, 10 * 1)).when(allocator) + .getResourceLimit(); allocator.scheduleReduces( - totalMaps, succeededMaps, - scheduledMaps, scheduledReduces, - assignedMaps, assignedReduces, - mapResourceReqt, reduceResourceReqt, - numPendingReduces, + totalMaps, succeededMaps, + scheduledMaps, scheduledReduces, + assignedMaps, assignedReduces, + mapResourceReqt, reduceResourceReqt, + numPendingReduces, maxReduceRampupLimit, reduceSlowStart); verify(allocator).rampDownReduces(anyInt()); @@ -1846,7 +1847,8 @@ public void testReduceScheduling() throws Exception { // should be invoked twice. scheduledMaps = 2; assignedReduces = 2; - doReturn(10 * 1024).when(allocator).getMemLimit(); + doReturn(BuilderUtils.newResource(10 * 1024, 10 * 1)).when(allocator) + .getResourceLimit(); allocator.scheduleReduces( totalMaps, succeededMaps, scheduledMaps, scheduledReduces, @@ -1855,6 +1857,30 @@ public void testReduceScheduling() throws Exception { numPendingReduces, maxReduceRampupLimit, reduceSlowStart); verify(allocator, times(2)).rampDownReduces(anyInt()); + + doReturn( + EnumSet.of(SchedulerResourceTypes.MEMORY, SchedulerResourceTypes.CPU)) + .when(allocator).getSchedulerResourceTypes(); + + // Test ramp-down when enough memory but not enough cpu resource + scheduledMaps = 10; + assignedReduces = 0; + doReturn(BuilderUtils.newResource(100 * 1024, 5 * 1)).when(allocator) + .getResourceLimit(); + allocator.scheduleReduces(totalMaps, succeededMaps, scheduledMaps, + scheduledReduces, assignedMaps, assignedReduces, mapResourceReqt, + reduceResourceReqt, numPendingReduces, maxReduceRampupLimit, + reduceSlowStart); + verify(allocator, times(3)).rampDownReduces(anyInt()); + + // Test ramp-down when enough cpu but not enough memory resource + doReturn(BuilderUtils.newResource(10 * 1024, 100 * 1)).when(allocator) + .getResourceLimit(); + allocator.scheduleReduces(totalMaps, succeededMaps, scheduledMaps, + scheduledReduces, assignedMaps, assignedReduces, mapResourceReqt, + reduceResourceReqt, numPendingReduces, maxReduceRampupLimit, + reduceSlowStart); + verify(allocator, times(4)).rampDownReduces(anyInt()); } private static class RecalculateContainerAllocator extends MyContainerAllocator { @@ -1868,7 +1894,7 @@ public RecalculateContainerAllocator(MyResourceManager rm, @Override public void scheduleReduces(int totalMaps, int completedMaps, int scheduledMaps, int scheduledReduces, int assignedMaps, - int assignedReduces, int mapResourceReqt, int reduceResourceReqt, + int assignedReduces, Resource mapResourceReqt, Resource reduceResourceReqt, int numPendingReduces, float maxReduceRampupLimit, float reduceSlowStart) { recalculatedReduceSchedule = true; } @@ -2095,7 +2121,7 @@ public void testRMContainerAllocatorResendsRequestsOnRMRestart() conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true); - + conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0); conf.setBoolean(MRJobConfig.MR_AM_JOB_NODE_BLACKLISTING_ENABLE, true); conf.setInt(MRJobConfig.MAX_TASK_FAILURES_PER_TRACKER, 1); conf.setInt( diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index 3bd8414099c9e..113b44539f03f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -26,12 +26,11 @@ import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; @@ -58,8 +57,6 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.util.ApplicationClassLoader; -import org.apache.hadoop.util.Shell; -import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.ContainerLogAppender; import org.apache.hadoop.yarn.api.ApplicationConstants; @@ -469,6 +466,62 @@ public static void setupDistributedCache( DistributedCache.getFileVisibilities(conf)); } + /** + * Set up the DistributedCache related configs to make + * {@link DistributedCache#getLocalCacheFiles(Configuration)} + * and + * {@link DistributedCache#getLocalCacheArchives(Configuration)} + * working. + * @param conf + * @throws java.io.IOException + */ + public static void setupDistributedCacheLocal(Configuration conf) + throws IOException { + + String localWorkDir = System.getenv("PWD"); + // ^ ^ all symlinks are created in the current work-dir + + // Update the configuration object with localized archives. + URI[] cacheArchives = DistributedCache.getCacheArchives(conf); + if (cacheArchives != null) { + List localArchives = new ArrayList(); + for (int i = 0; i < cacheArchives.length; ++i) { + URI u = cacheArchives[i]; + Path p = new Path(u); + Path name = + new Path((null == u.getFragment()) ? p.getName() + : u.getFragment()); + String linkName = name.toUri().getPath(); + localArchives.add(new Path(localWorkDir, linkName).toUri().getPath()); + } + if (!localArchives.isEmpty()) { + conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils + .arrayToString(localArchives.toArray(new String[localArchives + .size()]))); + } + } + + // Update the configuration object with localized files. + URI[] cacheFiles = DistributedCache.getCacheFiles(conf); + if (cacheFiles != null) { + List localFiles = new ArrayList(); + for (int i = 0; i < cacheFiles.length; ++i) { + URI u = cacheFiles[i]; + Path p = new Path(u); + Path name = + new Path((null == u.getFragment()) ? p.getName() + : u.getFragment()); + String linkName = name.toUri().getPath(); + localFiles.add(new Path(localWorkDir, linkName).toUri().getPath()); + } + if (!localFiles.isEmpty()) { + conf.set(MRJobConfig.CACHE_LOCALFILES, + StringUtils.arrayToString(localFiles + .toArray(new String[localFiles.size()]))); + } + } + } + private static String getResourceDescription(LocalResourceType type) { if(type == LocalResourceType.ARCHIVE || type == LocalResourceType.PATTERN) { return "cache archive (" + MRJobConfig.CACHE_ARCHIVES + ") "; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java index 0734e7f29530b..6cd569a65ce34 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java @@ -578,7 +578,9 @@ private void readTokensFromFiles(Configuration conf, Credentials credentials) conf.get("mapreduce.job.credentials.binary"); if (binaryTokenFilename != null) { Credentials binary = Credentials.readTokenStorageFile( - new Path("file:///" + binaryTokenFilename), conf); + FileSystem.getLocal(conf).makeQualified( + new Path(binaryTokenFilename)), + conf); credentials.addAll(binary); } // add secret keys coming from a json file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 4c48cf5123519..e39dd6a30cdd8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -298,6 +298,14 @@ public interface MRJobConfig { public static final String MAX_FETCH_FAILURES_NOTIFICATIONS = "mapreduce.reduce.shuffle.max-fetch-failures-notifications"; public static final int DEFAULT_MAX_FETCH_FAILURES_NOTIFICATIONS = 3; + + public static final String SHUFFLE_FETCH_RETRY_INTERVAL_MS = "mapreduce.reduce.shuffle.fetch.retry.interval-ms"; + /** Default interval that fetcher retry to fetch during NM restart.*/ + public final static int DEFAULT_SHUFFLE_FETCH_RETRY_INTERVAL_MS = 1000; + + public static final String SHUFFLE_FETCH_RETRY_TIMEOUT_MS = "mapreduce.reduce.shuffle.fetch.retry.timeout-ms"; + + public static final String SHUFFLE_FETCH_RETRY_ENABLED = "mapreduce.reduce.shuffle.fetch.retry.enabled"; public static final String SHUFFLE_NOTIFY_READERROR = "mapreduce.reduce.shuffle.notify.readerror"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/TokenCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/TokenCache.java index cadd04b56341e..7b1f657ec40b1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/TokenCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/security/TokenCache.java @@ -134,7 +134,9 @@ private static void mergeBinaryTokens(Credentials creds, Configuration conf) { Credentials binary; try { binary = Credentials.readTokenStorageFile( - new Path("file:///" + binaryTokenFilename), conf); + FileSystem.getLocal(conf).makeQualified( + new Path(binaryTokenFilename)), + conf); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java index e1e16635a1bb4..a41620058cf9e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java @@ -27,6 +27,7 @@ import java.net.URLConnection; import java.security.GeneralSecurityException; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -46,6 +47,8 @@ import org.apache.hadoop.mapreduce.security.SecureShuffleUtils; import org.apache.hadoop.mapreduce.CryptoUtils; import org.apache.hadoop.security.ssl.SSLFactory; +import org.apache.hadoop.util.Time; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import com.google.common.annotations.VisibleForTesting; @@ -85,10 +88,18 @@ private static enum ShuffleErrors{IO_ERROR, WRONG_LENGTH, BAD_ID, WRONG_MAP, private final int connectionTimeout; private final int readTimeout; + private final int fetchRetryTimeout; + private final int fetchRetryInterval; + + private final boolean fetchRetryEnabled; + private final SecretKey shuffleSecretKey; protected HttpURLConnection connection; private volatile boolean stopped = false; + + // Initiative value is 0, which means it hasn't retried yet. + private long retryStartTime = 0; private static boolean sslShuffle; private static SSLFactory sslFactory; @@ -135,6 +146,19 @@ public Fetcher(JobConf job, TaskAttemptID reduceId, this.readTimeout = job.getInt(MRJobConfig.SHUFFLE_READ_TIMEOUT, DEFAULT_READ_TIMEOUT); + this.fetchRetryInterval = job.getInt(MRJobConfig.SHUFFLE_FETCH_RETRY_INTERVAL_MS, + MRJobConfig.DEFAULT_SHUFFLE_FETCH_RETRY_INTERVAL_MS); + + this.fetchRetryTimeout = job.getInt(MRJobConfig.SHUFFLE_FETCH_RETRY_TIMEOUT_MS, + DEFAULT_STALLED_COPY_TIMEOUT); + + boolean shuffleFetchEnabledDefault = job.getBoolean( + YarnConfiguration.NM_RECOVERY_ENABLED, + YarnConfiguration.DEFAULT_NM_RECOVERY_ENABLED); + this.fetchRetryEnabled = job.getBoolean( + MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, + shuffleFetchEnabledDefault); + setName("fetcher#" + id); setDaemon(true); @@ -242,6 +266,8 @@ private void abortConnect(MapHost host, Set remaining) { */ @VisibleForTesting protected void copyFromHost(MapHost host) throws IOException { + // reset retryStartTime for a new host + retryStartTime = 0; // Get completed maps on 'host' List maps = scheduler.getMapsForHost(host); @@ -261,60 +287,14 @@ protected void copyFromHost(MapHost host) throws IOException { // Construct the url and connect DataInputStream input = null; + URL url = getMapOutputURL(host, maps); try { - URL url = getMapOutputURL(host, maps); - openConnection(url); - if (stopped) { - abortConnect(host, remaining); - return; - } + setupConnectionsWithRetry(host, remaining, url); - // generate hash of the url - String msgToEncode = SecureShuffleUtils.buildMsgFrom(url); - String encHash = SecureShuffleUtils.hashFromString(msgToEncode, - shuffleSecretKey); - - // put url hash into http header - connection.addRequestProperty( - SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); - // set the read timeout - connection.setReadTimeout(readTimeout); - // put shuffle version into http header - connection.addRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, - ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); - connection.addRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, - ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); - connect(connection, connectionTimeout); - // verify that the thread wasn't stopped during calls to connect if (stopped) { abortConnect(host, remaining); return; } - input = new DataInputStream(connection.getInputStream()); - - // Validate response code - int rc = connection.getResponseCode(); - if (rc != HttpURLConnection.HTTP_OK) { - throw new IOException( - "Got invalid response code " + rc + " from " + url + - ": " + connection.getResponseMessage()); - } - // get the shuffle version - if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals( - connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) - || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals( - connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION))) { - throw new IOException("Incompatible shuffle response version"); - } - // get the replyHash which is HMac of the encHash we sent to the server - String replyHash = connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH); - if(replyHash==null) { - throw new IOException("security validation of TT Map output failed"); - } - LOG.debug("url="+msgToEncode+";encHash="+encHash+";replyHash="+replyHash); - // verify that replyHash is HMac of encHash - SecureShuffleUtils.verifyReply(replyHash, encHash, shuffleSecretKey); - LOG.info("for url="+msgToEncode+" sent hash and received reply"); } catch (IOException ie) { boolean connectExcpt = ie instanceof ConnectException; ioErrs.increment(1); @@ -336,6 +316,8 @@ protected void copyFromHost(MapHost host) throws IOException { return; } + input = new DataInputStream(connection.getInputStream()); + try { // Loop through available map-outputs and fetch them // On any error, faildTasks is not null and we exit @@ -343,7 +325,23 @@ protected void copyFromHost(MapHost host) throws IOException { // yet_to_be_fetched list and marking the failed tasks. TaskAttemptID[] failedTasks = null; while (!remaining.isEmpty() && failedTasks == null) { - failedTasks = copyMapOutput(host, input, remaining); + try { + failedTasks = copyMapOutput(host, input, remaining, fetchRetryEnabled); + } catch (IOException e) { + // + // Setup connection again if disconnected by NM + connection.disconnect(); + // Get map output from remaining tasks only. + url = getMapOutputURL(host, remaining); + + // Connect with retry as expecting host's recovery take sometime. + setupConnectionsWithRetry(host, remaining, url); + if (stopped) { + abortConnect(host, remaining); + return; + } + input = new DataInputStream(connection.getInputStream()); + } } if(failedTasks != null && failedTasks.length > 0) { @@ -371,19 +369,111 @@ protected void copyFromHost(MapHost host) throws IOException { } } } + + private void setupConnectionsWithRetry(MapHost host, + Set remaining, URL url) throws IOException { + openConnectionWithRetry(host, remaining, url); + if (stopped) { + return; + } + + // generate hash of the url + String msgToEncode = SecureShuffleUtils.buildMsgFrom(url); + String encHash = SecureShuffleUtils.hashFromString(msgToEncode, + shuffleSecretKey); + + setupShuffleConnection(encHash); + connect(connection, connectionTimeout); + // verify that the thread wasn't stopped during calls to connect + if (stopped) { + return; + } + + verifyConnection(url, msgToEncode, encHash); + } + + private void openConnectionWithRetry(MapHost host, + Set remaining, URL url) throws IOException { + long startTime = Time.monotonicNow(); + boolean shouldWait = true; + while (shouldWait) { + try { + openConnection(url); + shouldWait = false; + } catch (IOException e) { + if (!fetchRetryEnabled) { + // throw exception directly if fetch's retry is not enabled + throw e; + } + if ((Time.monotonicNow() - startTime) >= this.fetchRetryTimeout) { + LOG.warn("Failed to connect to host: " + url + "after " + + fetchRetryTimeout + "milliseconds."); + throw e; + } + try { + Thread.sleep(this.fetchRetryInterval); + } catch (InterruptedException e1) { + if (stopped) { + return; + } + } + } + } + } + + private void verifyConnection(URL url, String msgToEncode, String encHash) + throws IOException { + // Validate response code + int rc = connection.getResponseCode(); + if (rc != HttpURLConnection.HTTP_OK) { + throw new IOException( + "Got invalid response code " + rc + " from " + url + + ": " + connection.getResponseMessage()); + } + // get the shuffle version + if (!ShuffleHeader.DEFAULT_HTTP_HEADER_NAME.equals( + connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) + || !ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION.equals( + connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION))) { + throw new IOException("Incompatible shuffle response version"); + } + // get the replyHash which is HMac of the encHash we sent to the server + String replyHash = connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH); + if(replyHash==null) { + throw new IOException("security validation of TT Map output failed"); + } + LOG.debug("url="+msgToEncode+";encHash="+encHash+";replyHash="+replyHash); + // verify that replyHash is HMac of encHash + SecureShuffleUtils.verifyReply(replyHash, encHash, shuffleSecretKey); + LOG.info("for url="+msgToEncode+" sent hash and received reply"); + } + + private void setupShuffleConnection(String encHash) { + // put url hash into http header + connection.addRequestProperty( + SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); + // set the read timeout + connection.setReadTimeout(readTimeout); + // put shuffle version into http header + connection.addRequestProperty(ShuffleHeader.HTTP_HEADER_NAME, + ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + connection.addRequestProperty(ShuffleHeader.HTTP_HEADER_VERSION, + ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + } private static TaskAttemptID[] EMPTY_ATTEMPT_ID_ARRAY = new TaskAttemptID[0]; private TaskAttemptID[] copyMapOutput(MapHost host, DataInputStream input, - Set remaining) { + Set remaining, + boolean canRetry) throws IOException { MapOutput mapOutput = null; TaskAttemptID mapId = null; long decompressedLength = -1; long compressedLength = -1; try { - long startTime = System.currentTimeMillis(); + long startTime = Time.monotonicNow(); int forReduce = -1; //Read the shuffle header try { @@ -449,7 +539,10 @@ private TaskAttemptID[] copyMapOutput(MapHost host, } // Inform the shuffle scheduler - long endTime = System.currentTimeMillis(); + long endTime = Time.monotonicNow(); + // Reset retryStartTime as map task make progress if retried before. + retryStartTime = 0; + scheduler.copySucceeded(mapId, host, compressedLength, endTime - startTime, mapOutput); // Note successful shuffle @@ -457,9 +550,14 @@ private TaskAttemptID[] copyMapOutput(MapHost host, metrics.successFetch(); return null; } catch (IOException ioe) { + + if (canRetry) { + checkTimeoutOrRetry(host, ioe); + } + ioErrs.increment(1); if (mapId == null || mapOutput == null) { - LOG.info("fetcher#" + id + " failed to read map header" + + LOG.warn("fetcher#" + id + " failed to read map header" + mapId + " decomp: " + decompressedLength + ", " + compressedLength, ioe); if(mapId == null) { @@ -468,7 +566,7 @@ private TaskAttemptID[] copyMapOutput(MapHost host, return new TaskAttemptID[] {mapId}; } } - + LOG.warn("Failed to shuffle output of " + mapId + " from " + host.getHostName(), ioe); @@ -479,6 +577,29 @@ private TaskAttemptID[] copyMapOutput(MapHost host, } } + + /** check if hit timeout of retry, if not, throw an exception and start a + * new round of retry.*/ + private void checkTimeoutOrRetry(MapHost host, IOException ioe) + throws IOException { + // First time to retry. + long currentTime = Time.monotonicNow(); + if (retryStartTime == 0) { + retryStartTime = currentTime; + } + + // Retry is not timeout, let's do retry with throwing an exception. + if (currentTime - retryStartTime < this.fetchRetryTimeout) { + LOG.warn("Shuffle output from " + host.getHostName() + + " failed, retry it."); + throw ioe; + } else { + // timeout, prepare to be failed. + LOG.warn("Timeout for copying MapOutput with retry on host " + host + + "after " + fetchRetryTimeout + "milliseconds."); + + } + } /** * Do some basic verification on the input received -- Being defensive @@ -525,7 +646,7 @@ private boolean verifySanity(long compressedLength, long decompressedLength, * @return * @throws MalformedURLException */ - private URL getMapOutputURL(MapHost host, List maps + private URL getMapOutputURL(MapHost host, Collection maps ) throws MalformedURLException { // Get the base url StringBuffer url = new StringBuffer(host.getBaseUrl()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java index 63f326632efc1..e48a73a0c1252 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.task.reduce.MapHost.State; import org.apache.hadoop.util.Progress; +import org.apache.hadoop.util.Time; @InterfaceAudience.Private @InterfaceStability.Unstable @@ -121,7 +122,7 @@ public ShuffleSchedulerImpl(JobConf job, TaskStatus status, this.shuffledMapsCounter = shuffledMapsCounter; this.reduceShuffleBytes = reduceShuffleBytes; this.failedShuffleCounter = failedShuffleCounter; - this.startTime = System.currentTimeMillis(); + this.startTime = Time.monotonicNow(); lastProgressTime = startTime; referee.start(); this.maxFailedUniqueFetches = Math.min(totalMaps, 5); @@ -198,7 +199,7 @@ public synchronized void copySucceeded(TaskAttemptID mapId, totalBytesShuffledTillNow += bytes; updateStatus(); reduceShuffleBytes.increment(bytes); - lastProgressTime = System.currentTimeMillis(); + lastProgressTime = Time.monotonicNow(); LOG.debug("map " + mapId + " done " + status.getStateString()); } } @@ -206,7 +207,7 @@ public synchronized void copySucceeded(TaskAttemptID mapId, private void updateStatus() { float mbs = (float) totalBytesShuffledTillNow / (1024 * 1024); int mapsDone = totalMaps - remainingMaps; - long secsSinceStart = (System.currentTimeMillis() - startTime) / 1000 + 1; + long secsSinceStart = (Time.monotonicNow() - startTime) / 1000 + 1; float transferRate = mbs / secsSinceStart; progress.set((float) mapsDone / totalMaps); @@ -307,7 +308,7 @@ private void checkReducerHealth() { // check if the reducer is stalled for a long time // duration for which the reducer is stalled int stallDuration = - (int)(System.currentTimeMillis() - lastProgressTime); + (int)(Time.monotonicNow() - lastProgressTime); // duration for which the reducer ran with progress int shuffleProgressDuration = @@ -389,7 +390,7 @@ public synchronized MapHost getHost() throws InterruptedException { LOG.info("Assigning " + host + " with " + host.getNumKnownMapOutputs() + " to " + Thread.currentThread().getName()); - shuffleStart.set(System.currentTimeMillis()); + shuffleStart.set(Time.monotonicNow()); return host; } @@ -430,7 +431,7 @@ public synchronized void freeHost(MapHost host) { } } LOG.info(host + " freed by " + Thread.currentThread().getName() + " in " + - (System.currentTimeMillis()-shuffleStart.get()) + "ms"); + (Time.monotonicNow()-shuffleStart.get()) + "ms"); } public synchronized void resetKnownMaps() { @@ -464,12 +465,12 @@ private static class Penalty implements Delayed { Penalty(MapHost host, long delay) { this.host = host; - this.endTime = System.currentTimeMillis() + delay; + this.endTime = Time.monotonicNow() + delay; } @Override public long getDelay(TimeUnit unit) { - long remainingTime = endTime - System.currentTimeMillis(); + long remainingTime = endTime - Time.monotonicNow(); return unit.convert(remainingTime, TimeUnit.MILLISECONDS); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 6cefdc97b52d4..d1052c5b249ab 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -128,6 +128,27 @@ + + mapreduce.reduce.shuffle.fetch.retry.enabled + ${yarn.nodemanager.recovery.enabled} + Set to enable fetch retry during host restart. + + + + mapreduce.reduce.shuffle.fetch.retry.interval-ms + 1000 + Time of interval that fetcher retry to fetch again when some + non-fatal failure happens because of some events like NM restart. + + + + + mapreduce.reduce.shuffle.fetch.retry.timeout-ms + 30000 + Timeout value for fetcher to retry to fetch again when some + non-fatal failure happens because of some events like NM restart. + + mapreduce.reduce.shuffle.retry-delay.max.ms 60000 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm index db0a25f7e4fa2..be557a7329332 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm @@ -59,6 +59,11 @@ How to Create an Archive `hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir` + If you specify source files that are in an encryption zone, they will be + decrypted and written into the archive. If the har file is not located in an + encryption zone, then they will be stored in clear (decrypted) form. If the + har file is located in an encryption zone they will stored in encrypted form. + How to Look Up Files in Archives -------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/security/TestTokenCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/security/TestTokenCache.java index de594d405e32c..127f8ae35b301 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/security/TestTokenCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/security/TestTokenCache.java @@ -63,12 +63,25 @@ public void testObtainTokens() throws Exception { @Test @SuppressWarnings("deprecation") - public void testBinaryCredentials() throws Exception { + public void testBinaryCredentialsWithoutScheme() throws Exception { + testBinaryCredentials(false); + } + + @Test + @SuppressWarnings("deprecation") + public void testBinaryCredentialsWithScheme() throws Exception { + testBinaryCredentials(true); + } + + private void testBinaryCredentials(boolean hasScheme) throws Exception { Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","test/build/data")); // ick, but need fq path minus file:/ - String binaryTokenFile = FileSystem.getLocal(conf).makeQualified( - new Path(TEST_ROOT_DIR, "tokenFile")).toUri().getPath(); + String binaryTokenFile = hasScheme + ? FileSystem.getLocal(conf).makeQualified( + new Path(TEST_ROOT_DIR, "tokenFile")).toString() + : FileSystem.getLocal(conf).makeQualified( + new Path(TEST_ROOT_DIR, "tokenFile")).toUri().getPath(); MockFileSystem fs1 = createFileSystemForServiceName("service1"); MockFileSystem fs2 = createFileSystemForServiceName("service2"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java index 3db382e4f4466..7736c4854ff1a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestFetcher.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.MapOutputFile; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskID; import org.junit.After; @@ -60,6 +61,7 @@ import org.apache.hadoop.mapreduce.security.SecureShuffleUtils; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.util.DiskChecker.DiskErrorException; +import org.apache.hadoop.util.Time; import org.junit.Test; import org.mockito.invocation.InvocationOnMock; @@ -71,6 +73,7 @@ public class TestFetcher { private static final Log LOG = LogFactory.getLog(TestFetcher.class); JobConf job = null; + JobConf jobWithRetry = null; TaskAttemptID id = null; ShuffleSchedulerImpl ss = null; MergeManagerImpl mm = null; @@ -93,6 +96,9 @@ public class TestFetcher { public void setup() { LOG.info(">>>> " + name.getMethodName()); job = new JobConf(); + job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false); + jobWithRetry = new JobConf(); + jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true); id = TaskAttemptID.forName("attempt_0_1_r_1_1"); ss = mock(ShuffleSchedulerImpl.class); mm = mock(MergeManagerImpl.class); @@ -228,6 +234,38 @@ public void testCopyFromHostIncompatibleShuffleVersion() throws Exception { verify(ss, times(3)).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); verify(ss, times(3)).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); } + + @Test + public void testCopyFromHostIncompatibleShuffleVersionWithRetry() + throws Exception { + String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); + + when(connection.getResponseCode()).thenReturn(200); + when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) + .thenReturn("mapreduce").thenReturn("other").thenReturn("other"); + when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)) + .thenReturn("1.0.1").thenReturn("1.0.0").thenReturn("1.0.1"); + when(connection.getHeaderField( + SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)).thenReturn(replyHash); + ByteArrayInputStream in = new ByteArrayInputStream(new byte[0]); + when(connection.getInputStream()).thenReturn(in); + + for (int i = 0; i < 3; ++i) { + Fetcher underTest = new FakeFetcher(jobWithRetry, + id, ss, mm, r, metrics, except, key, connection); + underTest.copyFromHost(host); + } + + verify(connection, times(3)).addRequestProperty( + SecureShuffleUtils.HTTP_HEADER_URL_HASH, encHash); + + verify(allErrs, times(3)).increment(1); + verify(ss, times(3)).copyFailed(map1ID, host, false, false); + verify(ss, times(3)).copyFailed(map2ID, host, false, false); + + verify(ss, times(3)).putBackKnownMapOutput(any(MapHost.class), eq(map1ID)); + verify(ss, times(3)).putBackKnownMapOutput(any(MapHost.class), eq(map2ID)); + } @Test public void testCopyFromHostWait() throws Exception { @@ -301,6 +339,48 @@ public void testCopyFromHostCompressFailure() throws Exception { encHash); verify(ss, times(1)).copyFailed(map1ID, host, true, false); } + + @SuppressWarnings("unchecked") + @Test(timeout=10000) + public void testCopyFromHostWithRetry() throws Exception { + InMemoryMapOutput immo = mock(InMemoryMapOutput.class); + ss = mock(ShuffleSchedulerImpl.class); + Fetcher underTest = new FakeFetcher(jobWithRetry, + id, ss, mm, r, metrics, except, key, connection, true); + + String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key); + + when(connection.getResponseCode()).thenReturn(200); + when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH)) + .thenReturn(replyHash); + ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1); + ByteArrayOutputStream bout = new ByteArrayOutputStream(); + header.write(new DataOutputStream(bout)); + ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray()); + when(connection.getInputStream()).thenReturn(in); + when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME)) + .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME); + when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION)) + .thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION); + when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt())) + .thenReturn(immo); + + final long retryTime = Time.monotonicNow(); + doAnswer(new Answer() { + public Void answer(InvocationOnMock ignore) throws IOException { + // Emulate host down for 3 seconds. + if ((Time.monotonicNow() - retryTime) <= 3000) { + throw new java.lang.InternalError(); + } + return null; + } + }).when(immo).shuffle(any(MapHost.class), any(InputStream.class), anyLong(), + anyLong(), any(ShuffleClientMetrics.class), any(Reporter.class)); + + underTest.copyFromHost(host); + verify(ss, never()).copyFailed(any(TaskAttemptID.class),any(MapHost.class), + anyBoolean(), anyBoolean()); + } @Test public void testCopyFromHostExtraBytes() throws Exception { @@ -447,6 +527,9 @@ public Void answer(InvocationOnMock ignore) throws IOException { public static class FakeFetcher extends Fetcher { + // If connection need to be reopen. + private boolean renewConnection = false; + public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl scheduler, MergeManagerImpl merger, Reporter reporter, ShuffleClientMetrics metrics, @@ -456,6 +539,17 @@ public FakeFetcher(JobConf job, TaskAttemptID reduceId, exceptionReporter, jobTokenSecret); this.connection = connection; } + + public FakeFetcher(JobConf job, TaskAttemptID reduceId, + ShuffleSchedulerImpl scheduler, MergeManagerImpl merger, + Reporter reporter, ShuffleClientMetrics metrics, + ExceptionReporter exceptionReporter, SecretKey jobTokenSecret, + HttpURLConnection connection, boolean renewConnection) { + super(job, reduceId, scheduler, merger, reporter, metrics, + exceptionReporter, jobTokenSecret); + this.connection = connection; + this.renewConnection = renewConnection; + } public FakeFetcher(JobConf job, TaskAttemptID reduceId, ShuffleSchedulerImpl scheduler, MergeManagerImpl merger, @@ -469,7 +563,7 @@ public FakeFetcher(JobConf job, TaskAttemptID reduceId, @Override protected void openConnection(URL url) throws IOException { - if (null == connection) { + if (null == connection || renewConnection) { super.openConnection(url); } // already 'opened' the mocked connection diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/client/HSAdmin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/client/HSAdmin.java index be6ca1318199a..000ea54618b28 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/client/HSAdmin.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/client/HSAdmin.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.v2.hs.HSProxies; import org.apache.hadoop.mapreduce.v2.hs.protocol.HSAdminRefreshProtocol; import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; @@ -41,7 +42,7 @@ public HSAdmin() { super(); } - public HSAdmin(Configuration conf) { + public HSAdmin(JobConf conf) { super(conf); } @@ -331,7 +332,8 @@ public int run(String[] args) throws Exception { } public static void main(String[] args) throws Exception { - int result = ToolRunner.run(new HSAdmin(), args); + JobConf conf = new JobConf(); + int result = ToolRunner.run(new HSAdmin(conf), args); System.exit(result); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/server/TestHSAdminServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/server/TestHSAdminServer.java index 277a1953c8e81..2c239ec2bb82c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/server/TestHSAdminServer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/server/TestHSAdminServer.java @@ -28,6 +28,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.v2.hs.JobHistory; import org.apache.hadoop.mapreduce.v2.hs.client.HSAdmin; import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; @@ -48,7 +49,7 @@ public class TestHSAdminServer { private HSAdminServer hsAdminServer = null; private HSAdmin hsAdminClient = null; - Configuration conf = null; + JobConf conf = null; private static long groupRefreshTimeoutSec = 1; JobHistory jobHistoryService = null; AggregatedLogDeletionService alds = null; @@ -81,7 +82,7 @@ public void cacheGroupsAdd(List groups) throws IOException { @Before public void init() throws HadoopIllegalArgumentException, IOException { - conf = new Configuration(); + conf = new JobConf(); conf.set(JHAdminConfig.JHS_ADMIN_ADDRESS, "0.0.0.0:0"); conf.setClass("hadoop.security.group.mapping", MockUnixGroupsMapping.class, GroupMappingServiceProvider.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml index e34518c6094cf..bd9c9c57241d2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001_conf.xml @@ -152,7 +152,6 @@ mapreduce.map.speculativefalse mapreduce.job.acl-view-job mapreduce.map.output.key.classorg.apache.hadoop.io.IntWritable -yarn.ipc.serializer.typeprotocolbuffers mapreduce.job.end-notification.max.retry.interval5 ftp.blocksize67108864 mapreduce.tasktracker.http.threads40 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java index c3eee2cfdcf2f..686fa0c70c599 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java @@ -69,6 +69,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; @@ -150,6 +151,8 @@ private MRClientProtocol getProxy() throws IOException { ApplicationReport application = null; try { application = rm.getApplicationReport(appId); + } catch (ApplicationNotFoundException e) { + application = null; } catch (YarnException e2) { throw new IOException(e2); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java index 5639e5d56fc2a..7d6b2f3081947 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java @@ -31,8 +31,6 @@ import java.util.Arrays; import java.util.Collection; -import org.junit.Assert; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.JobStatus; @@ -56,8 +54,10 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.util.Records; +import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -488,7 +488,9 @@ private ApplicationReport getRunningApplicationReport(String host, int port) { private ResourceMgrDelegate getRMDelegate() throws IOException { ResourceMgrDelegate rm = mock(ResourceMgrDelegate.class); try { - when(rm.getApplicationReport(jobId.getAppId())).thenReturn(null); + ApplicationId appId = jobId.getAppId(); + when(rm.getApplicationReport(appId)). + thenThrow(new ApplicationNotFoundException(appId + " not found")); } catch (YarnException e) { throw new IOException(e); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java index b92400dbf6ace..7a2c03b1b0be4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestBinaryTokenFile.java @@ -150,30 +150,15 @@ private void setupBinaryTokenFile(Job job) { // Credentials in the job will not have delegation tokens // because security is disabled. Fetch delegation tokens // and store in binary token file. - try { - Credentials cred1 = new Credentials(); - Credentials cred2 = new Credentials(); - TokenCache.obtainTokensForNamenodesInternal(cred1, new Path[] { p1 }, - job.getConfiguration()); - for (Token t : cred1.getAllTokens()) { - cred2.addToken(new Text(DELEGATION_TOKEN_KEY), t); - } - DataOutputStream os = new DataOutputStream(new FileOutputStream( - binaryTokenFileName.toString())); - try { - cred2.writeTokenStorageToStream(os); - } finally { - os.close(); - } - job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, - binaryTokenFileName.toString()); - // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY key now gets deleted from config, - // so it's not accessible in the job's config. So, we use another key to pass the file name into the job configuration: - job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME, - binaryTokenFileName.toString()); - } catch (IOException e) { - Assert.fail("Exception " + e); - } + createBinaryTokenFile(job.getConfiguration()); + job.getConfiguration().set(MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY, + binaryTokenFileName.toString()); + // NB: the MRJobConfig.MAPREDUCE_JOB_CREDENTIALS_BINARY + // key now gets deleted from config, + // so it's not accessible in the job's config. So, + // we use another key to pass the file name into the job configuration: + job.getConfiguration().set(KEY_SECURITY_TOKEN_FILE_NAME, + binaryTokenFileName.toString()); } } @@ -225,7 +210,29 @@ public static void tearDown() throws Exception { dfsCluster = null; } } - + + private static void createBinaryTokenFile(Configuration conf) { + // Fetch delegation tokens and store in binary token file. + try { + Credentials cred1 = new Credentials(); + Credentials cred2 = new Credentials(); + TokenCache.obtainTokensForNamenodesInternal(cred1, new Path[] { p1 }, + conf); + for (Token t : cred1.getAllTokens()) { + cred2.addToken(new Text(DELEGATION_TOKEN_KEY), t); + } + DataOutputStream os = new DataOutputStream(new FileOutputStream( + binaryTokenFileName.toString())); + try { + cred2.writeTokenStorageToStream(os); + } finally { + os.close(); + } + } catch (IOException e) { + Assert.fail("Exception " + e); + } + } + /** * run a distributed job and verify that TokenCache is available * @throws IOException @@ -252,4 +259,33 @@ public void testBinaryTokenFile() throws IOException { } assertEquals("dist job res is not 0:", 0, res); } + + /** + * run a distributed job with -tokenCacheFile option parameter and + * verify that no exception happens. + * @throws IOException + */ + @Test + public void testTokenCacheFile() throws IOException { + Configuration conf = mrCluster.getConfig(); + createBinaryTokenFile(conf); + // provide namenodes names for the job to get the delegation tokens for + final String nnUri = dfsCluster.getURI(0).toString(); + conf.set(MRJobConfig.JOB_NAMENODES, nnUri + "," + nnUri); + + // using argument to pass the file name + final String[] args = { + "-tokenCacheFile", binaryTokenFileName.toString(), + "-m", "1", "-r", "1", "-mt", "1", "-rt", "1" + }; + int res = -1; + try { + res = ToolRunner.run(conf, new SleepJob(), args); + } catch (Exception e) { + System.out.println("Job failed with " + e.getLocalizedMessage()); + e.printStackTrace(System.out); + fail("Job failed"); + } + assertEquals("dist job res is not 0:", 0, res); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestNonExistentJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestNonExistentJob.java index d4f5f8433684b..b6947f3fc4895 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestNonExistentJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestNonExistentJob.java @@ -90,13 +90,8 @@ protected void tearDown() throws Exception { } public void testGetInvalidJob() throws Exception { - try { - RunningJob runJob = new JobClient(getJobConf()).getJob(JobID.forName("job_0_0")); - fail("Exception is expected to thrown ahead!"); - } catch (Exception e) { - assertTrue(e instanceof IOException); - assertTrue(e.getMessage().contains("ApplicationNotFoundException")); - } + RunningJob runJob = new JobClient(getJobConf()).getJob(JobID.forName("job_0_0")); + assertNull(runJob); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java index 32199e554105f..e89a919e0503f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestUberAM.java @@ -191,11 +191,4 @@ public void testSleepJobWithSecurityOn() throws IOException, InterruptedException, ClassNotFoundException { super.testSleepJobWithSecurityOn(); } - - // Add a test for distcache when uber mode is enabled. TODO - @Override - @Test - public void testDistributedCache() throws Exception { - // - } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index a121fafc33ff9..3bc1a570a14d8 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -864,6 +864,16 @@ 2.9.1 + + org.apache.curator + curator-recipes + 2.6.0 + + + org.apache.curator + curator-client + 2.6.0 + org.apache.curator curator-framework diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index a42aff0a382de..991447f173247 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -93,6 +93,7 @@ + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 577711f4ce0d2..dae957ee30ebb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -1095,7 +1095,7 @@ public boolean rename(Path src, Path dst) throws IOException { if (dstKey.startsWith(srcKey + PATH_DELIMITER)) { if (LOG.isDebugEnabled()) { - LOG.debug("Renaming directory to a itself is disallowed. src=" + src + LOG.debug("Renaming directory to itself is disallowed. src=" + src + " dest=" + dst); } return false; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java index 4620cfc5a2954..8ac590bfb6bf8 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java @@ -681,7 +681,7 @@ public void run() { } catch (IOException e) { LOG.warn("Failure killing " + job.getJobName(), e); } catch (Exception e) { - LOG.error("Unexcpected exception", e); + LOG.error("Unexpected exception", e); } } LOG.info("Done."); diff --git a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json index 230e4fd8245c4..59ae8d755a51c 100644 --- a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json +++ b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json @@ -4716,7 +4716,6 @@ "dfs.journalnode.http-address" : "0.0.0.0:8480", "mapreduce.job.acl-view-job" : " ", "mapreduce.reduce.shuffle.retry-delay.max.ms" : "60000", - "yarn.ipc.serializer.type" : "protocolbuffers", "mapreduce.job.end-notification.max.retry.interval" : "5", "ftp.blocksize" : "67108864", "mapreduce.tasktracker.http.threads" : "80", @@ -4841,7 +4840,7 @@ "yarn.ipc.rpc.class" : "org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC", "mapreduce.job.name" : "TeraGen", "kfs.blocksize" : "67108864", - "yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs" : "86400", + "yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs" : "86400", "mapreduce.job.ubertask.maxmaps" : "9", "yarn.scheduler.maximum-allocation-mb" : "8192", "yarn.nodemanager.heartbeat.interval-ms" : "1000", @@ -9830,7 +9829,6 @@ "dfs.journalnode.http-address" : "0.0.0.0:8480", "mapreduce.job.acl-view-job" : " ", "mapreduce.reduce.shuffle.retry-delay.max.ms" : "60000", - "yarn.ipc.serializer.type" : "protocolbuffers", "mapreduce.job.end-notification.max.retry.interval" : "5", "ftp.blocksize" : "67108864", "mapreduce.tasktracker.http.threads" : "80", @@ -9955,7 +9953,7 @@ "yarn.ipc.rpc.class" : "org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC", "mapreduce.job.name" : "TeraGen", "kfs.blocksize" : "67108864", - "yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs" : "86400", + "yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs" : "86400", "mapreduce.job.ubertask.maxmaps" : "9", "yarn.scheduler.maximum-allocation-mb" : "8192", "yarn.nodemanager.heartbeat.interval-ms" : "1000", diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index c64c0b63b0fbc..7ff5641bf86d5 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -1016,7 +1016,7 @@ public int submitAndMonitorJob() throws IOException { if (background_) { LOG.info("Job is running in background."); } else if (!jc_.monitorAndPrintJob(jobConf_, running_)) { - LOG.error("Job not Successful!"); + LOG.error("Job not successful!"); return 1; } LOG.info("Output directory: " + output_); diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 51fe3cce10883..a2d0536b09482 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -235,6 +235,18 @@ Release 2.6.0 - UNRELEASED YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into DistributedShell (xgong) + YARN-2001. Added a time threshold for RM to wait before starting container + allocations after restart/failover. (Jian He via vinodkv) + + YARN-1372. Ensure all completed containers are reported to the AMs across + RM restart. (Anubhav Dhoot via jianhe) + + YARN-2539. FairScheduler: Set the default value for maxAMShare to 0.5. + (Wei Yan via kasha) + + YARN-1959. Fix headroom calculation in FairScheduler. + (Anubhav Dhoot via kasha) + OPTIMIZATIONS BUG FIXES @@ -377,6 +389,48 @@ Release 2.6.0 - UNRELEASED YARN-2529. Generic history service RPC interface doesn't work when service authorization is enabled. (Zhijie Shen via jianhe) + YARN-2558. Updated ContainerTokenIdentifier#read/write to use + ContainerId#getContainerId. (Tsuyoshi OZAWA via jianhe) + + YARN-2559. Fixed NPE in SystemMetricsPublisher when retrieving + FinalApplicationStatus. (Zhijie Shen via jianhe) + + YARN-1779. Fixed AMRMClient to handle AMRMTokens correctly across + ResourceManager work-preserving-restart or failover. (Jian He via vinodkv) + + YARN-2363. Submitted applications occasionally lack a tracking URL (jlowe) + + YARN-2561. MR job client cannot reconnect to AM after NM restart. (Junping + Du via jlowe) + + YARN-2563. Fixed YarnClient to call getTimeLineDelegationToken only if the + Token is not present. (Zhijie Shen via jianhe) + + YARN-2568. Fixed the potential test failures due to race conditions when RM + work-preserving recovery is enabled. (Jian He via zjshen) + + YARN-2565. Fixed RM to not use FileSystemApplicationHistoryStore unless + explicitly set. (Zhijie Shen via jianhe) + + YARN-2460. Remove obsolete entries from yarn-default.xml (Ray Chiang via + aw) + + YARN-2452. TestRMApplicationHistoryWriter fails with FairScheduler. + (Zhihai Xu via kasha) + + YARN-2453. TestProportionalCapacityPreemptionPolicy fails with + FairScheduler. (Zhihai Xu via kasha) + + YARN-2540. FairScheduler: Queue filters not working on scheduler page in + RM UI. (Ashwin Shankar via kasha) + + YARN-2584. TestContainerManagerSecurity fails on trunk. (Jian He via + junping_du) + + YARN-2252. Intermittent failure of + TestFairScheduler.testContinuousScheduling. + (Ratandeep Ratti and kasha via kasha) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 44a6fc3d345f2..acc4a055dfa70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -353,6 +353,11 @@ public class YarnConfiguration extends Configuration { public static final boolean DEFAULT_RM_WORK_PRESERVING_RECOVERY_ENABLED = false; + public static final String RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS = + RM_PREFIX + "work-preserving-recovery.scheduling-wait-ms"; + public static final long DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS = + 10000; + /** Zookeeper interaction configs */ public static final String RM_ZK_PREFIX = RM_PREFIX + "zk-"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java index 08cacee2b2455..3aba01a0efec5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java @@ -53,7 +53,7 @@ public class TestUnmanagedAMLauncher { .getLog(TestUnmanagedAMLauncher.class); protected static MiniYARNCluster yarnCluster = null; - protected static Configuration conf = new Configuration(); + protected static Configuration conf = new YarnConfiguration(); @BeforeClass public static void setup() throws InterruptedException, IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index e36d7ade78c22..88b2f456a898c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -756,6 +756,7 @@ private void updateAMRMToken(Token token) throws IOException { new org.apache.hadoop.security.token.Token(token .getIdentifier().array(), token.getPassword().array(), new Text( token.getKind()), new Text(token.getService())); + amrmToken.setService(ClientRMProxy.getAMRMTokenService(getConfig())); UserGroupInformation currentUGI = UserGroupInformation.getCurrentUser(); if (UserGroupInformation.isSecurityEnabled()) { currentUGI = UserGroupInformation.getLoginUser(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index f1a3b6eeceaf3..def6da55ea8a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -110,7 +110,8 @@ public class YarnClientImpl extends YarnClient { private AHSClient historyClient; private boolean historyServiceEnabled; protected TimelineClient timelineClient; - protected Text timelineService; + @VisibleForTesting + Text timelineService; protected boolean timelineServiceEnabled; private static final String ROOT = "root"; @@ -272,12 +273,6 @@ public YarnClientApplication createApplication() private void addTimelineDelegationToken( ContainerLaunchContext clc) throws YarnException, IOException { - org.apache.hadoop.security.token.Token timelineDelegationToken = - timelineClient.getDelegationToken( - UserGroupInformation.getCurrentUser().getUserName()); - if (timelineDelegationToken == null) { - return; - } Credentials credentials = new Credentials(); DataInputByteBuffer dibb = new DataInputByteBuffer(); ByteBuffer tokens = clc.getTokens(); @@ -290,11 +285,15 @@ private void addTimelineDelegationToken( // one more for (org.apache.hadoop.security.token.Token token : credentials .getAllTokens()) { - TokenIdentifier tokenIdentifier = token.decodeIdentifier(); - if (tokenIdentifier instanceof TimelineDelegationTokenIdentifier) { + if (token.getKind().equals(TimelineDelegationTokenIdentifier.KIND_NAME)) { return; } } + org.apache.hadoop.security.token.Token + timelineDelegationToken = getTimelineDelegationToken(); + if (timelineDelegationToken == null) { + return; + } credentials.addToken(timelineService, timelineDelegationToken); if (LOG.isDebugEnabled()) { LOG.debug("Add timline delegation token into credentials: " @@ -306,6 +305,13 @@ private void addTimelineDelegationToken( clc.setTokens(tokens); } + @VisibleForTesting + org.apache.hadoop.security.token.Token + getTimelineDelegationToken() throws IOException, YarnException { + return timelineClient.getDelegationToken( + UserGroupInformation.getCurrentUser().getUserName()); + } + @Private @VisibleForTesting protected boolean isSecurityEnabled() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java index 0b42ac3c6b92d..5b12940ce597c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java @@ -57,7 +57,7 @@ public void initiate() throws Exception { Token appToken = this.cluster.getResourceManager().getRMContext() .getAMRMTokenSecretManager().createAndGetAMRMToken(attemptId); - appToken.setService(new Text("appToken service")); + appToken.setService(ClientRMProxy.getAMRMTokenService(conf)); UserGroupInformation.setLoginUser(UserGroupInformation .createRemoteUser(UserGroupInformation.getCurrentUser() .getUserName())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 38dbf79da996b..a434e35a9f230 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.ClientRMProxy; import org.apache.hadoop.yarn.client.api.AMRMClient; import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest; import org.apache.hadoop.yarn.client.api.NMTokenCache; @@ -196,6 +197,7 @@ Collections. emptyMap(), // of testing. UserGroupInformation.setLoginUser(UserGroupInformation .createRemoteUser(UserGroupInformation.getCurrentUser().getUserName())); + appAttempt.getAMRMToken().setService(ClientRMProxy.getAMRMTokenService(conf)); UserGroupInformation.getCurrentUser().addToken(appAttempt.getAMRMToken()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java index dfad6d6c6623b..ce3086f57020c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java @@ -84,6 +84,7 @@ public static void setup() throws Exception { conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true); + conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0); } // Test does major 6 steps verification. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java index 8259893af37aa..3c1b1c1990848 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java @@ -20,6 +20,8 @@ import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -39,6 +41,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataInputByteBuffer; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -745,10 +748,13 @@ public void testAutomaticTimelineDelegationTokenLoading() Configuration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); SecurityUtil.setAuthenticationMethod(AuthenticationMethod.KERBEROS, conf); + TimelineDelegationTokenIdentifier timelineDT = + new TimelineDelegationTokenIdentifier(); final Token dToken = - new Token(); + new Token( + timelineDT.getBytes(), new byte[0], timelineDT.getKind(), new Text()); // crate a mock client - YarnClientImpl client = new YarnClientImpl() { + YarnClientImpl client = spy(new YarnClientImpl() { @Override protected void serviceInit(Configuration conf) throws Exception { if (getConfig().getBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, @@ -784,34 +790,48 @@ public ApplicationReport getApplicationReport(ApplicationId appId) { public boolean isSecurityEnabled() { return true; } - }; + }); client.init(conf); client.start(); - ApplicationSubmissionContext context = - mock(ApplicationSubmissionContext.class); - ApplicationId applicationId = ApplicationId.newInstance(0, 1); - when(context.getApplicationId()).thenReturn(applicationId); - DataOutputBuffer dob = new DataOutputBuffer(); - Credentials credentials = new Credentials(); - credentials.writeTokenStorageToStream(dob); - ByteBuffer tokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); - ContainerLaunchContext clc = ContainerLaunchContext.newInstance( - null, null, null, null, tokens, null); - when(context.getAMContainerSpec()).thenReturn(clc); - client.submitApplication(context); - // Check whether token is added or not - credentials = new Credentials(); - DataInputByteBuffer dibb = new DataInputByteBuffer(); - tokens = clc.getTokens(); - if (tokens != null) { - dibb.reset(tokens); - credentials.readTokenStorageStream(dibb); - tokens.rewind(); + try { + // when i == 0, timeline DT already exists, no need to get one more + // when i == 1, timeline DT doesn't exist, need to get one more + for (int i = 0; i < 2; ++i) { + ApplicationSubmissionContext context = + mock(ApplicationSubmissionContext.class); + ApplicationId applicationId = ApplicationId.newInstance(0, i + 1); + when(context.getApplicationId()).thenReturn(applicationId); + DataOutputBuffer dob = new DataOutputBuffer(); + Credentials credentials = new Credentials(); + if (i == 0) { + credentials.addToken(client.timelineService, dToken); + } + credentials.writeTokenStorageToStream(dob); + ByteBuffer tokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength()); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + null, null, null, null, tokens, null); + when(context.getAMContainerSpec()).thenReturn(clc); + client.submitApplication(context); + if (i == 0) { + // GetTimelineDelegationToken shouldn't be called + verify(client, never()).getTimelineDelegationToken(); + } + // In either way, token should be there + credentials = new Credentials(); + DataInputByteBuffer dibb = new DataInputByteBuffer(); + tokens = clc.getTokens(); + if (tokens != null) { + dibb.reset(tokens); + credentials.readTokenStorageStream(dibb); + tokens.rewind(); + } + Collection> dTokens = + credentials.getAllTokens(); + Assert.assertEquals(1, dTokens.size()); + Assert.assertEquals(dToken, dTokens.iterator().next()); + } + } finally { + client.stop(); } - Collection> dTokens = - credentials.getAllTokens(); - Assert.assertEquals(1, dTokens.size()); - Assert.assertEquals(dToken, dTokens.iterator().next()); - client.stop(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java index 3434755274e45..b29263edcd820 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java @@ -22,11 +22,12 @@ import java.net.InetSocketAddress; import java.util.ArrayList; -import com.google.common.base.Joiner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.SecurityUtil; @@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @InterfaceAudience.Public @@ -70,23 +72,17 @@ public static T createRMProxy(final Configuration configuration, return createRMProxy(configuration, protocol, INSTANCE); } - private static void setupTokens(InetSocketAddress resourceManagerAddress) + private static void setAMRMTokenService(final Configuration conf) throws IOException { - // It is assumed for now that the only AMRMToken in AM's UGI is for this - // cluster/RM. TODO: Fix later when we have some kind of cluster-ID as - // default service-address, see YARN-1779. for (Token token : UserGroupInformation .getCurrentUser().getTokens()) { if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { - // This token needs to be directly provided to the AMs, so set the - // appropriate service-name. We'll need more infrastructure when we - // need to set it in HA case. - SecurityUtil.setTokenService(token, resourceManagerAddress); + token.setService(getAMRMTokenService(conf)); } } } - @InterfaceAudience.Private + @Private @Override protected InetSocketAddress getRMAddress(YarnConfiguration conf, Class protocol) throws IOException { @@ -100,12 +96,10 @@ protected InetSocketAddress getRMAddress(YarnConfiguration conf, YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS, YarnConfiguration.DEFAULT_RM_ADMIN_PORT); } else if (protocol == ApplicationMasterProtocol.class) { - InetSocketAddress serviceAddr = - conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, - YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, - YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); - setupTokens(serviceAddr); - return serviceAddr; + setAMRMTokenService(conf); + return conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); } else { String message = "Unsupported protocol found when creating the proxy " + "connection to ResourceManager: " + @@ -115,7 +109,7 @@ protected InetSocketAddress getRMAddress(YarnConfiguration conf, } } - @InterfaceAudience.Private + @Private @Override protected void checkAllowedProtocols(Class protocol) { Preconditions.checkArgument( @@ -132,8 +126,23 @@ protected void checkAllowedProtocols(Class protocol) { * RMDelegationToken for * @return - Service name for RMDelegationToken */ - @InterfaceStability.Unstable + @Unstable public static Text getRMDelegationTokenService(Configuration conf) { + return getTokenService(conf, YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_PORT); + } + + @Unstable + public static Text getAMRMTokenService(Configuration conf) { + return getTokenService(conf, YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); + } + + @Unstable + public static Text getTokenService(Configuration conf, String address, + String defaultAddr, int defaultPort) { if (HAUtil.isHAEnabled(conf)) { // Build a list of service addresses to form the service name ArrayList services = new ArrayList(); @@ -142,17 +151,14 @@ public static Text getRMDelegationTokenService(Configuration conf) { // Set RM_ID to get the corresponding RM_ADDRESS yarnConf.set(YarnConfiguration.RM_HA_ID, rmId); services.add(SecurityUtil.buildTokenService( - yarnConf.getSocketAddr(YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_PORT)).toString()); + yarnConf.getSocketAddr(address, defaultAddr, defaultPort)) + .toString()); } return new Text(Joiner.on(',').join(services)); } // Non-HA case - no need to set RM_ID - return SecurityUtil.buildTokenService( - conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_PORT)); + return SecurityUtil.buildTokenService(conf.getSocketAddr(address, + defaultAddr, defaultPort)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java index 469383963ec8b..be3701d704898 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java @@ -48,11 +48,18 @@ public Token selectToken(Text service, LOG.debug("Token kind is " + token.getKind().toString() + " and the token's service name is " + token.getService()); if (AMRMTokenIdentifier.KIND_NAME.equals(token.getKind()) - && service.equals(token.getService())) { + && checkService(service, token)) { return (Token) token; } } return null; } + private boolean checkService(Text service, + Token token) { + if (service == null || token.getService() == null) { + return false; + } + return token.getService().toString().contains(service.toString()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java index 8b8177a79774f..ca847e0726ef8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java @@ -128,7 +128,7 @@ public void write(DataOutput out) throws IOException { out.writeLong(applicationId.getClusterTimestamp()); out.writeInt(applicationId.getId()); out.writeInt(applicationAttemptId.getAttemptId()); - out.writeInt(this.containerId.getId()); + out.writeLong(this.containerId.getContainerId()); out.writeUTF(this.nmHostAddr); out.writeUTF(this.appSubmitter); out.writeInt(this.resource.getMemory()); @@ -147,7 +147,7 @@ public void readFields(DataInput in) throws IOException { ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, in.readInt()); this.containerId = - ContainerId.newInstance(applicationAttemptId, in.readInt()); + ContainerId.newInstance(applicationAttemptId, in.readLong()); this.nmHostAddr = in.readUTF(); this.appSubmitter = in.readUTF(); int memory = in.readInt(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java index 077c962883190..a205bd1f57443 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java @@ -18,12 +18,12 @@ package org.apache.hadoop.yarn.util.resource; -import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.util.Records; -@Private +@InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) @Unstable public class Resources { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 3a7e94ae8c8da..e642d051a8670 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -30,22 +30,11 @@ yarn.ipc.client.factory.class - - Type of serialization to use. - yarn.ipc.serializer.type - protocolbuffers - - Factory to create server IPC classes. yarn.ipc.server.factory.class - - Factory to create IPC exceptions. - yarn.ipc.exception.factory.class - - Factory to create serializeable records. yarn.ipc.record.factory.class @@ -162,12 +151,6 @@ 1 - - How often should the RM check that the AM is still alive. - yarn.resourcemanager.amliveliness-monitor.interval-ms - 1000 - - Maximum time to wait to establish connection to ResourceManager. @@ -220,12 +203,6 @@ 600000 - - How often to check that node managers are still alive. - yarn.resourcemanager.nm.liveness-monitor.interval-ms - 1000 - - Path to file with nodes to include. yarn.resourcemanager.nodes.include-path @@ -297,6 +274,16 @@ false + + Set the amount of time RM waits before allocating new + containers on work-preserving-recovery. Such wait period gives RM a chance + to settle down resyncing with NMs in the cluster on recovery, before assigning + new containers to applications. + + yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms + 10000 + + The class to use as the persistent store. @@ -570,7 +557,7 @@ Interval for the roll over for the master key used to generate application tokens - yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs + yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs 86400 @@ -1080,20 +1067,6 @@ 2000 - - Max time, in seconds, to wait to establish a connection to RM when NM starts. - The NM will shutdown if it cannot connect to RM within the specified max time period. - If the value is set as -1, then NM will retry forever. - yarn.nodemanager.resourcemanager.connect.wait.secs - 900 - - - - Time interval, in seconds, between each NM attempt to connect to RM. - yarn.nodemanager.resourcemanager.connect.retry_interval.secs - 30 - - The minimum allowed version of a resourcemanager that a nodemanager will connect to. The valid values are NONE (no version checking), EqualToNM (the resourcemanager's version is diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java index 1a252abf5b3ea..700a37ff31b02 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java @@ -56,4 +56,34 @@ public void testGetRMDelegationTokenService() { service.contains(defaultRMAddress)); } } + + @Test + public void testGetAMRMTokenService() { + String defaultRMAddress = YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS; + YarnConfiguration conf = new YarnConfiguration(); + + // HA is not enabled + Text tokenService = ClientRMProxy.getAMRMTokenService(conf); + String[] services = tokenService.toString().split(","); + assertEquals(1, services.length); + for (String service : services) { + assertTrue("Incorrect token service name", + service.contains(defaultRMAddress)); + } + + // HA is enabled + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2"); + conf.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, "rm1"), + "0.0.0.0"); + conf.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, "rm2"), + "0.0.0.0"); + tokenService = ClientRMProxy.getAMRMTokenService(conf); + services = tokenService.toString().split(","); + assertEquals(2, services.length); + for (String service : services) { + assertTrue("Incorrect token service name", + service.contains(defaultRMAddress)); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java index 204e118c7abd0..f52ab07cf4503 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java @@ -182,7 +182,9 @@ private ApplicationHistoryManager createApplicationHistoryManager( // APPLICATION_HISTORY_STORE is neither null nor empty, it means that the // user has enabled it explicitly. if (conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE) == null || - conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE).length() == 0) { + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE).length() == 0 || + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE).equals( + NullApplicationHistoryStore.class.getName())) { return new ApplicationHistoryManagerOnTimelineStore( timelineDataManager, aclsManager); } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 38dfa582801ce..9887acccb179b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -30,6 +30,7 @@ public interface NodeHeartbeatResponse { NodeAction getNodeAction(); List getContainersToCleanup(); + List getFinishedContainersPulledByAM(); List getApplicationsToCleanup(); @@ -43,6 +44,10 @@ public interface NodeHeartbeatResponse { void setNMTokenMasterKey(MasterKey secretKey); void addAllContainersToCleanup(List containers); + + // This tells NM to remove finished containers only after the AM + // has actually received it in a previous allocate response + void addFinishedContainersPulledByAM(List containers); void addAllApplicationsToCleanup(List applications); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index 775f95afe98b3..e9296f4323463 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -46,6 +46,7 @@ public class NodeHeartbeatResponsePBImpl extends ProtoBase containersToCleanup = null; + private List finishedContainersPulledByAM = null; private List applicationsToCleanup = null; private MasterKey containerTokenMasterKey = null; private MasterKey nmTokenMasterKey = null; @@ -73,6 +74,9 @@ private void mergeLocalToBuilder() { if (this.applicationsToCleanup != null) { addApplicationsToCleanupToProto(); } + if (this.finishedContainersPulledByAM != null) { + addFinishedContainersPulledByAMToProto(); + } if (this.containerTokenMasterKey != null) { builder.setContainerTokenMasterKey( convertToProtoFormat(this.containerTokenMasterKey)); @@ -199,6 +203,12 @@ public List getContainersToCleanup() { return this.containersToCleanup; } + @Override + public List getFinishedContainersPulledByAM() { + initFinishedContainersPulledByAM(); + return this.finishedContainersPulledByAM; + } + private void initContainersToCleanup() { if (this.containersToCleanup != null) { return; @@ -212,6 +222,19 @@ private void initContainersToCleanup() { } } + private void initFinishedContainersPulledByAM() { + if (this.finishedContainersPulledByAM != null) { + return; + } + NodeHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getFinishedContainersPulledByAmList(); + this.finishedContainersPulledByAM = new ArrayList(); + + for (ContainerIdProto c : list) { + this.finishedContainersPulledByAM.add(convertFromProtoFormat(c)); + } + } + @Override public void addAllContainersToCleanup( final List containersToCleanup) { @@ -221,6 +244,15 @@ public void addAllContainersToCleanup( this.containersToCleanup.addAll(containersToCleanup); } + @Override + public void addFinishedContainersPulledByAM( + final List finishedContainersPulledByAM) { + if (finishedContainersPulledByAM == null) + return; + initFinishedContainersPulledByAM(); + this.finishedContainersPulledByAM.addAll(finishedContainersPulledByAM); + } + private void addContainersToCleanupToProto() { maybeInitBuilder(); builder.clearContainersToCleanup(); @@ -256,6 +288,41 @@ public void remove() { builder.addAllContainersToCleanup(iterable); } + private void addFinishedContainersPulledByAMToProto() { + maybeInitBuilder(); + builder.clearFinishedContainersPulledByAm(); + if (finishedContainersPulledByAM == null) + return; + Iterable iterable = new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + + Iterator iter = finishedContainersPulledByAM.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public ContainerIdProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + + } + }; + + } + }; + builder.addAllFinishedContainersPulledByAm(iterable); + } + @Override public List getApplicationsToCleanup() { initApplicationsToCleanup(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto index 29cd64e782d96..600f54d647236 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto @@ -58,6 +58,7 @@ message NodeHeartbeatResponseProto { repeated ApplicationIdProto applications_to_cleanup = 6; optional int64 nextHeartBeatInterval = 7; optional string diagnostics_message = 8; + repeated ContainerIdProto finished_containers_pulled_by_am = 9; } message NMContainerStatusProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index a479be29f739b..43770c188ca60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -311,7 +311,7 @@ public void run() { public static class NMContext implements Context { private NodeId nodeId = null; - private final ConcurrentMap applications = + protected final ConcurrentMap applications = new ConcurrentHashMap(); protected final ConcurrentMap containers = new ConcurrentSkipListMap(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index b52b0fbf6e9d9..b4dcf1f2d8749 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -104,11 +104,6 @@ public class NodeStatusUpdaterImpl extends AbstractService implements // Duration for which to track recently stopped container. private long durationToTrackStoppedContainers; - // This is used to track the current completed containers when nodeheartBeat - // is called. These completed containers will be removed from NM context after - // nodeHeartBeat succeeds and the response from the nodeHeartBeat is - // processed. - private final Set previousCompletedContainers; private final NodeHealthCheckerService healthChecker; private final NodeManagerMetrics metrics; @@ -125,7 +120,6 @@ public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, this.metrics = metrics; this.recentlyStoppedContainers = new LinkedHashMap(); - this.previousCompletedContainers = new HashSet(); } @Override @@ -331,7 +325,7 @@ private List createKeepAliveApplicationList() { return appList; } - private NodeStatus getNodeStatus(int responseId) { + private NodeStatus getNodeStatus(int responseId) throws IOException { NodeHealthStatus nodeHealthStatus = this.context.getNodeHealthStatus(); nodeHealthStatus.setHealthReport(healthChecker.getHealthReport()); @@ -352,11 +346,18 @@ private NodeStatus getNodeStatus(int responseId) { // Iterate through the NMContext and clone and get all the containers' // statuses. If it's a completed container, add into the - // recentlyStoppedContainers and previousCompletedContainers collections. + // recentlyStoppedContainers collections. @VisibleForTesting - protected List getContainerStatuses() { + protected List getContainerStatuses() throws IOException { List containerStatuses = new ArrayList(); for (Container container : this.context.getContainers().values()) { + ContainerId containerId = container.getContainerId(); + ApplicationId applicationId = container.getContainerId() + .getApplicationAttemptId().getApplicationId(); + if (!this.context.getApplications().containsKey(applicationId)) { + context.getContainers().remove(containerId); + continue; + } org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = container.cloneAndGetContainerStatus(); containerStatuses.add(containerStatus); @@ -381,10 +382,17 @@ private List getRunningApplications() { } // These NMContainerStatus are sent on NM registration and used by YARN only. - private List getNMContainerStatuses() { + private List getNMContainerStatuses() throws IOException { List containerStatuses = new ArrayList(); for (Container container : this.context.getContainers().values()) { + ContainerId containerId = container.getContainerId(); + ApplicationId applicationId = container.getContainerId() + .getApplicationAttemptId().getApplicationId(); + if (!this.context.getApplications().containsKey(applicationId)) { + context.getContainers().remove(containerId); + continue; + } NMContainerStatus status = container.getNMContainerStatus(); containerStatuses.add(status); @@ -402,26 +410,30 @@ private List getNMContainerStatuses() { @Override public void addCompletedContainer(ContainerId containerId) { - synchronized (previousCompletedContainers) { - previousCompletedContainers.add(containerId); - } synchronized (recentlyStoppedContainers) { removeVeryOldStoppedContainersFromCache(); - recentlyStoppedContainers.put(containerId, - System.currentTimeMillis() + durationToTrackStoppedContainers); + if (!recentlyStoppedContainers.containsKey(containerId)) { + recentlyStoppedContainers.put(containerId, + System.currentTimeMillis() + durationToTrackStoppedContainers); + } } } - private void removeCompletedContainersFromContext() { - synchronized (previousCompletedContainers) { - if (!previousCompletedContainers.isEmpty()) { - for (ContainerId containerId : previousCompletedContainers) { - this.context.getContainers().remove(containerId); - } - LOG.info("Removed completed containers from NM context: " - + previousCompletedContainers); - previousCompletedContainers.clear(); - } + @VisibleForTesting + @Private + public void removeCompletedContainersFromContext( + List containerIds) throws IOException { + Set removedContainers = new HashSet(); + + // If the AM has pulled the completedContainer it can be removed + for (ContainerId containerId : containerIds) { + context.getContainers().remove(containerId); + removedContainers.add(containerId); + } + + if (!removedContainers.isEmpty()) { + LOG.info("Removed completed containers from NM context: " + + removedContainers); } } @@ -454,7 +466,7 @@ public boolean isContainerRecentlyStopped(ContainerId containerId) { return recentlyStoppedContainers.containsKey(containerId); } } - + @Override public void clearFinishedContainersFromCache() { synchronized (recentlyStoppedContainers) { @@ -472,11 +484,13 @@ public void removeVeryOldStoppedContainersFromCache() { while (i.hasNext()) { ContainerId cid = i.next(); if (recentlyStoppedContainers.get(cid) < currentTime) { - i.remove(); - try { - context.getNMStateStore().removeContainer(cid); - } catch (IOException e) { - LOG.error("Unable to remove container " + cid + " in store", e); + if (!context.getContainers().containsKey(cid)) { + i.remove(); + try { + context.getNMStateStore().removeContainer(cid); + } catch (IOException e) { + LOG.error("Unable to remove container " + cid + " in store", e); + } } } else { break; @@ -542,7 +556,9 @@ public void run() { // don't want to remove the completed containers before resync // because these completed containers will be reported back to RM // when NM re-registers with RM. - removeCompletedContainersFromContext(); + // Only remove the cleanedup containers that are acked + removeCompletedContainersFromContext(response + .getFinishedContainersPulledByAM()); lastHeartBeatID = response.getResponseId(); List containersToCleanup = response diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index acda2a9970c66..85bafb3dee585 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -247,6 +248,10 @@ public RegisterNodeManagerResponse registerNodeManager( // put the completed container into the context getNMContext().getContainers().put( testCompleteContainer.getContainerId(), container); + getNMContext().getApplications().put( + testCompleteContainer.getContainerId() + .getApplicationAttemptId().getApplicationId(), + mock(Application.class)); } else { // second register contains the completed container info. List statuses = @@ -382,9 +387,17 @@ protected void rebootNodeStatusUpdaterAndRegisterWithRM() { if (containersShouldBePreserved) { Assert.assertFalse(containers.isEmpty()); Assert.assertTrue(containers.containsKey(existingCid)); + Assert.assertEquals(ContainerState.RUNNING, + containers.get(existingCid) + .cloneAndGetContainerStatus().getState()); } else { - // ensure that containers are empty before restart nodeStatusUpdater - Assert.assertTrue(containers.isEmpty()); + // ensure that containers are empty or are completed before + // restart nodeStatusUpdater + if (!containers.isEmpty()) { + Assert.assertEquals(ContainerState.COMPLETE, + containers.get(existingCid) + .cloneAndGetContainerStatus().getState()); + } } super.rebootNodeStatusUpdaterAndRegisterWithRM(); } @@ -465,7 +478,12 @@ protected void rebootNodeStatusUpdaterAndRegisterWithRM() { try { // ensure that containers are empty before restart nodeStatusUpdater - Assert.assertTrue(containers.isEmpty()); + if (!containers.isEmpty()) { + for (Container container: containers.values()) { + Assert.assertEquals(ContainerState.COMPLETE, + container.cloneAndGetContainerStatus().getState()); + } + } super.rebootNodeStatusUpdaterAndRegisterWithRM(); // After this point new containers are free to be launched, except // containers from previous RM diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index f2a3a4a8c0c94..8fb51a310547f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -58,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.client.RMProxy; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; @@ -180,7 +181,7 @@ private Map> getAppToContainerStatusMap( Map> map = new HashMap>(); for (ContainerStatus cs : containers) { - ApplicationId applicationId = + ApplicationId applicationId = cs.getContainerId().getApplicationAttemptId().getApplicationId(); List appContainers = map.get(applicationId); if (appContainers == null) { @@ -205,10 +206,10 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) nodeStatus.setResponseId(heartBeatID++); Map> appToContainers = getAppToContainerStatusMap(nodeStatus.getContainersStatuses()); - + ApplicationId appId1 = ApplicationId.newInstance(0, 1); ApplicationId appId2 = ApplicationId.newInstance(0, 2); - + if (heartBeatID == 1) { Assert.assertEquals(0, nodeStatus.getContainersStatuses().size()); @@ -419,7 +420,7 @@ protected void stopRMProxy() { } private class MyNodeManager extends NodeManager { - + private MyNodeStatusUpdater3 nodeStatusUpdater; @Override protected NodeStatusUpdater createNodeStatusUpdater(Context context, @@ -433,7 +434,7 @@ public MyNodeStatusUpdater3 getNodeStatusUpdater() { return this.nodeStatusUpdater; } } - + private class MyNodeManager2 extends NodeManager { public boolean isStopped = false; private NodeStatusUpdater nodeStatusUpdater; @@ -467,7 +468,7 @@ protected void serviceStop() throws Exception { syncBarrier.await(10000, TimeUnit.MILLISECONDS); } } - // + // private class MyResourceTracker2 implements ResourceTracker { public NodeAction heartBeatNodeAction = NodeAction.NORMAL; public NodeAction registerNodeAction = NodeAction.NORMAL; @@ -478,7 +479,7 @@ private class MyResourceTracker2 implements ResourceTracker { public RegisterNodeManagerResponse registerNodeManager( RegisterNodeManagerRequest request) throws YarnException, IOException { - + RegisterNodeManagerResponse response = recordFactory .newRecordInstance(RegisterNodeManagerResponse.class); response.setNodeAction(registerNodeAction ); @@ -493,7 +494,7 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnException, IOException { NodeStatus nodeStatus = request.getNodeStatus(); nodeStatus.setResponseId(heartBeatID++); - + NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils. newNodeHeartbeatResponse(heartBeatID, heartBeatNodeAction, null, null, null, null, 1000L); @@ -501,7 +502,7 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) return nhResponse; } } - + private class MyResourceTracker3 implements ResourceTracker { public NodeAction heartBeatNodeAction = NodeAction.NORMAL; public NodeAction registerNodeAction = NodeAction.NORMAL; @@ -513,7 +514,7 @@ private class MyResourceTracker3 implements ResourceTracker { MyResourceTracker3(Context context) { this.context = context; } - + @Override public RegisterNodeManagerResponse registerNodeManager( RegisterNodeManagerRequest request) throws YarnException, @@ -564,6 +565,14 @@ private class MyResourceTracker4 implements ResourceTracker { public NodeAction registerNodeAction = NodeAction.NORMAL; public NodeAction heartBeatNodeAction = NodeAction.NORMAL; private Context context; + private final ContainerStatus containerStatus2 = + createContainerStatus(2, ContainerState.RUNNING); + private final ContainerStatus containerStatus3 = + createContainerStatus(3, ContainerState.COMPLETE); + private final ContainerStatus containerStatus4 = + createContainerStatus(4, ContainerState.RUNNING); + private final ContainerStatus containerStatus5 = + createContainerStatus(5, ContainerState.COMPLETE); public MyResourceTracker4(Context context) { this.context = context; @@ -583,6 +592,8 @@ public RegisterNodeManagerResponse registerNodeManager( @Override public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnException, IOException { + List finishedContainersPulledByAM = new ArrayList + (); try { if (heartBeatID == 0) { Assert.assertEquals(request.getNodeStatus().getContainersStatuses() @@ -594,10 +605,6 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) Assert.assertEquals(statuses.size(), 2); Assert.assertEquals(context.getContainers().size(), 2); - ContainerStatus containerStatus2 = - createContainerStatus(2, ContainerState.RUNNING); - ContainerStatus containerStatus3 = - createContainerStatus(3, ContainerState.COMPLETE); boolean container2Exist = false, container3Exist = false; for (ContainerStatus status : statuses) { if (status.getContainerId().equals( @@ -619,23 +626,14 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) // nodeStatusUpdaterRunnable, otherwise nm just shuts down and the // test passes. throw new YarnRuntimeException("Lost the heartbeat response"); - } else if (heartBeatID == 2) { + } else if (heartBeatID == 2 || heartBeatID == 3) { List statuses = request.getNodeStatus().getContainersStatuses(); Assert.assertEquals(statuses.size(), 4); Assert.assertEquals(context.getContainers().size(), 4); - ContainerStatus containerStatus2 = - createContainerStatus(2, ContainerState.RUNNING); - ContainerStatus containerStatus3 = - createContainerStatus(3, ContainerState.COMPLETE); - ContainerStatus containerStatus4 = - createContainerStatus(4, ContainerState.RUNNING); - ContainerStatus containerStatus5 = - createContainerStatus(5, ContainerState.COMPLETE); - - boolean container2Exist = false, container3Exist = false, container4Exist = - false, container5Exist = false; + boolean container2Exist = false, container3Exist = false, + container4Exist = false, container5Exist = false; for (ContainerStatus status : statuses) { if (status.getContainerId().equals( containerStatus2.getContainerId())) { @@ -664,6 +662,24 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) } Assert.assertTrue(container2Exist && container3Exist && container4Exist && container5Exist); + + if (heartBeatID == 3) { + finishedContainersPulledByAM.add(containerStatus3.getContainerId()); + } + } else if (heartBeatID == 4) { + List statuses = + request.getNodeStatus().getContainersStatuses(); + Assert.assertEquals(statuses.size(), 3); + Assert.assertEquals(context.getContainers().size(), 3); + + boolean container3Exist = false; + for (ContainerStatus status : statuses) { + if (status.getContainerId().equals( + containerStatus3.getContainerId())) { + container3Exist = true; + } + } + Assert.assertFalse(container3Exist); } } catch (AssertionError error) { error.printStackTrace(); @@ -676,6 +692,7 @@ public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils.newNodeHeartbeatResponse(heartBeatID, heartBeatNodeAction, null, null, null, null, 1000L); + nhResponse.addFinishedContainersPulledByAM(finishedContainersPulledByAM); return nhResponse; } } @@ -686,7 +703,7 @@ private class MyResourceTracker5 implements ResourceTracker { public RegisterNodeManagerResponse registerNodeManager( RegisterNodeManagerRequest request) throws YarnException, IOException { - + RegisterNodeManagerResponse response = recordFactory .newRecordInstance(RegisterNodeManagerResponse.class); response.setNodeAction(registerNodeAction ); @@ -694,7 +711,7 @@ public RegisterNodeManagerResponse registerNodeManager( response.setNMTokenMasterKey(createMasterKey()); return response; } - + @Override public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) throws YarnException, IOException { @@ -767,11 +784,11 @@ public void deleteBaseDir() throws IOException { lfs.delete(new Path(basedir.getPath()), true); } - @Test(timeout = 90000) - public void testRecentlyFinishedContainers() throws Exception { - NodeManager nm = new NodeManager(); - YarnConfiguration conf = new YarnConfiguration(); - conf.set( + @Test(timeout = 90000) + public void testRecentlyFinishedContainers() throws Exception { + NodeManager nm = new NodeManager(); + YarnConfiguration conf = new YarnConfiguration(); + conf.set( NodeStatusUpdaterImpl.YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, "10000"); nm.init(conf); @@ -780,27 +797,112 @@ public void testRecentlyFinishedContainers() throws Exception { ApplicationId appId = ApplicationId.newInstance(0, 0); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 0); - ContainerId cId = ContainerId.newInstance(appAttemptId, 0); - - + ContainerId cId = ContainerId.newInstance(appAttemptId, 0); + nm.getNMContext().getApplications().putIfAbsent(appId, + mock(Application.class)); + nm.getNMContext().getContainers().putIfAbsent(cId, mock(Container.class)); + nodeStatusUpdater.addCompletedContainer(cId); Assert.assertTrue(nodeStatusUpdater.isContainerRecentlyStopped(cId)); - + + nm.getNMContext().getContainers().remove(cId); long time1 = System.currentTimeMillis(); int waitInterval = 15; while (waitInterval-- > 0 && nodeStatusUpdater.isContainerRecentlyStopped(cId)) { - nodeStatusUpdater.removeVeryOldStoppedContainersFromCache(); + nodeStatusUpdater.removeVeryOldStoppedContainersFromCache(); Thread.sleep(1000); } - long time2 = System.currentTimeMillis(); + long time2 = System.currentTimeMillis(); // By this time the container will be removed from cache. need to verify. - Assert.assertFalse(nodeStatusUpdater.isContainerRecentlyStopped(cId)); - Assert.assertTrue((time2 - time1) >= 10000 && (time2 -time1) <= 250000); - } - + Assert.assertFalse(nodeStatusUpdater.isContainerRecentlyStopped(cId)); + Assert.assertTrue((time2 - time1) >= 10000 && (time2 - time1) <= 250000); + } + @Test(timeout = 90000) + public void testRemovePreviousCompletedContainersFromContext() throws Exception { + NodeManager nm = new NodeManager(); + YarnConfiguration conf = new YarnConfiguration(); + conf.set( + NodeStatusUpdaterImpl + .YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, + "10000"); + nm.init(conf); + NodeStatusUpdaterImpl nodeStatusUpdater = + (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater(); + ApplicationId appId = ApplicationId.newInstance(0, 0); + ApplicationAttemptId appAttemptId = + ApplicationAttemptId.newInstance(appId, 0); + ContainerId cId = ContainerId.newInstance(appAttemptId, 1); + Token containerToken = + BuilderUtils.newContainerToken(cId, "anyHost", 1234, "anyUser", + BuilderUtils.newResource(1024, 1), 0, 123, + "password".getBytes(), 0); + Container anyCompletedContainer = new ContainerImpl(conf, null, + null, null, null, null, + BuilderUtils.newContainerTokenIdentifier(containerToken)) { + + @Override + public ContainerState getCurrentState() { + return ContainerState.COMPLETE; + } + }; + + nm.getNMContext().getApplications().putIfAbsent(appId, + mock(Application.class)); + nm.getNMContext().getContainers().put(cId, anyCompletedContainer); + Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size()); + + List ackedContainers = new ArrayList(); + ackedContainers.add(cId); + + nodeStatusUpdater.removeCompletedContainersFromContext(ackedContainers); + Assert.assertTrue(nodeStatusUpdater.getContainerStatuses().isEmpty()); + } + + @Test + public void testCleanedupApplicationContainerCleanup() throws IOException { + NodeManager nm = new NodeManager(); + YarnConfiguration conf = new YarnConfiguration(); + conf.set(NodeStatusUpdaterImpl + .YARN_NODEMANAGER_DURATION_TO_TRACK_STOPPED_CONTAINERS, + "1000000"); + nm.init(conf); + + NodeStatusUpdaterImpl nodeStatusUpdater = + (NodeStatusUpdaterImpl) nm.getNodeStatusUpdater(); + ApplicationId appId = ApplicationId.newInstance(0, 0); + ApplicationAttemptId appAttemptId = + ApplicationAttemptId.newInstance(appId, 0); + + ContainerId cId = ContainerId.newInstance(appAttemptId, 1); + Token containerToken = + BuilderUtils.newContainerToken(cId, "anyHost", 1234, "anyUser", + BuilderUtils.newResource(1024, 1), 0, 123, + "password".getBytes(), 0); + Container anyCompletedContainer = new ContainerImpl(conf, null, + null, null, null, null, + BuilderUtils.newContainerTokenIdentifier(containerToken)) { + + @Override + public ContainerState getCurrentState() { + return ContainerState.COMPLETE; + } + }; + + nm.getNMContext().getApplications().putIfAbsent(appId, + mock(Application.class)); + nm.getNMContext().getContainers().put(cId, anyCompletedContainer); + + Assert.assertEquals(1, nodeStatusUpdater.getContainerStatuses().size()); + + nm.getNMContext().getApplications().remove(appId); + nodeStatusUpdater.removeCompletedContainersFromContext(new ArrayList + ()); + Assert.assertEquals(0, nodeStatusUpdater.getContainerStatuses().size()); + } + @Test public void testNMRegistration() throws InterruptedException { nm = new NodeManager() { @@ -860,7 +962,7 @@ public void run() { nm.stop(); } - + @Test public void testStopReentrant() throws Exception { final AtomicInteger numCleanups = new AtomicInteger(0); @@ -875,7 +977,7 @@ protected NodeStatusUpdater createNodeStatusUpdater(Context context, myNodeStatusUpdater.resourceTracker = myResourceTracker2; return myNodeStatusUpdater; } - + @Override protected ContainerManagerImpl createContainerManager(Context context, ContainerExecutor exec, DeletionService del, @@ -897,7 +999,7 @@ public void cleanUpApplicationsOnNMShutDown() { YarnConfiguration conf = createNMConfig(); nm.init(conf); nm.start(); - + int waitCount = 0; while (heartBeatID < 1 && waitCount++ != 200) { Thread.sleep(500); @@ -906,7 +1008,7 @@ public void cleanUpApplicationsOnNMShutDown() { // Meanwhile call stop directly as the shutdown hook would nm.stop(); - + // NM takes a while to reach the STOPPED state. waitCount = 0; while (nm.getServiceState() != STATE.STOPPED && waitCount++ != 20) { @@ -1172,9 +1274,13 @@ protected NMContext createNMContext( nm.start(); int waitCount = 0; - while (heartBeatID <= 3 && waitCount++ != 20) { + while (heartBeatID <= 4 && waitCount++ != 20) { Thread.sleep(500); } + if (heartBeatID <= 4) { + Assert.fail("Failed to get all heartbeats in time, " + + "heartbeatID:" + heartBeatID); + } if(assertionFailedInThread.get()) { Assert.fail("ContainerStatus Backup failed"); } @@ -1182,7 +1288,7 @@ protected NMContext createNMContext( } @Test(timeout = 200000) - public void testNodeStatusUpdaterRetryAndNMShutdown() + public void testNodeStatusUpdaterRetryAndNMShutdown() throws Exception { final long connectionWaitSecs = 1000; final long connectionRetryIntervalMs = 1000; @@ -1190,7 +1296,7 @@ public void testNodeStatusUpdaterRetryAndNMShutdown() conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, connectionWaitSecs); conf.setLong(YarnConfiguration - .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, + .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, connectionRetryIntervalMs); conf.setLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, 5000); conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); @@ -1281,30 +1387,36 @@ public ConcurrentMap getContainers() { } else if (heartBeatID == 1) { ContainerStatus containerStatus2 = createContainerStatus(2, ContainerState.RUNNING); - Container container2 = getMockContainer(containerStatus2); - containers.put(containerStatus2.getContainerId(), container2); + putMockContainer(containerStatus2); ContainerStatus containerStatus3 = createContainerStatus(3, ContainerState.COMPLETE); - Container container3 = getMockContainer(containerStatus3); - containers.put(containerStatus3.getContainerId(), container3); + putMockContainer(containerStatus3); return containers; } else if (heartBeatID == 2) { ContainerStatus containerStatus4 = createContainerStatus(4, ContainerState.RUNNING); - Container container4 = getMockContainer(containerStatus4); - containers.put(containerStatus4.getContainerId(), container4); + putMockContainer(containerStatus4); ContainerStatus containerStatus5 = createContainerStatus(5, ContainerState.COMPLETE); - Container container5 = getMockContainer(containerStatus5); - containers.put(containerStatus5.getContainerId(), container5); + putMockContainer(containerStatus5); + return containers; + } else if (heartBeatID == 3 || heartBeatID == 4) { return containers; } else { containers.clear(); return containers; } } + + private void putMockContainer(ContainerStatus containerStatus) { + Container container = getMockContainer(containerStatus); + containers.put(containerStatus.getContainerId(), container); + applications.putIfAbsent(containerStatus.getContainerId() + .getApplicationAttemptId().getApplicationId(), + mock(Application.class)); + } } public static ContainerStatus createContainerStatus(int id, @@ -1345,7 +1457,7 @@ private void verifyNodeStartFailure(String errMessage) throws Exception { throw e; } } - + // the service should be stopped Assert.assertEquals("NM state is wrong!", STATE.STOPPED, nm .getServiceState()); @@ -1364,7 +1476,7 @@ private YarnConfiguration createNMConfig() { } conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB conf.set(YarnConfiguration.NM_ADDRESS, localhostAddress + ":12345"); - conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":12346"); + conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, localhostAddress + ":12346"); conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); @@ -1372,7 +1484,7 @@ private YarnConfiguration createNMConfig() { conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); return conf; } - + private NodeManager getNodeManager(final NodeAction nodeHeartBeatAction) { return new NodeManager() { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 46ef432ae6d56..60f88f60b988f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -108,4 +108,6 @@ void setRMApplicationHistoryWriter( boolean isWorkPreservingRecoveryEnabled(); long getEpoch(); -} \ No newline at end of file + + boolean isSchedulerReadyForAllocatingContainers(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index 8a9b51e56f99d..36eec045c52cf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -21,6 +21,9 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.yarn.LocalConfigurationProvider; @@ -44,6 +47,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.RMDelegationTokenSecretManager; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; import com.google.common.annotations.VisibleForTesting; @@ -85,6 +90,13 @@ public class RMContextImpl implements RMContext { private SystemMetricsPublisher systemMetricsPublisher; private ConfigurationProvider configurationProvider; private long epoch; + private Clock systemClock = new SystemClock(); + private long schedulerRecoveryStartTime = 0; + private long schedulerRecoveryWaitTime = 0; + private boolean printLog = true; + private boolean isSchedulerReady = false; + + private static final Log LOG = LogFactory.getLog(RMContextImpl.class); /** * Default constructor. To be used in conjunction with setter methods for @@ -379,7 +391,34 @@ public long getEpoch() { return this.epoch; } - void setEpoch(long epoch) { + void setEpoch(long epoch) { this.epoch = epoch; } -} \ No newline at end of file + + public void setSchedulerRecoveryStartAndWaitTime(long waitTime) { + this.schedulerRecoveryStartTime = systemClock.getTime(); + this.schedulerRecoveryWaitTime = waitTime; + } + + public boolean isSchedulerReadyForAllocatingContainers() { + if (isSchedulerReady) { + return isSchedulerReady; + } + isSchedulerReady = (systemClock.getTime() - schedulerRecoveryStartTime) + > schedulerRecoveryWaitTime; + if (!isSchedulerReady && printLog) { + LOG.info("Skip allocating containers. Scheduler is waiting for recovery."); + printLog = false; + } + if (isSchedulerReady) { + LOG.info("Scheduler recovery is done. Start allocating new containers."); + } + return isSchedulerReady; + } + + @Private + @VisibleForTesting + public void setSystemClock(Clock clock) { + this.systemClock = clock; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 0def6158ce638..79af7a649f3da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -1131,6 +1131,8 @@ public void recover(RMState state) throws Exception { // recover applications rmAppManager.recover(state); + + setSchedulerRecoveryStartAndWaitTime(state, conf); } public static void main(String argv[]) { @@ -1178,6 +1180,16 @@ private void resetDispatcher() { rmContext.setDispatcher(rmDispatcher); } + private void setSchedulerRecoveryStartAndWaitTime(RMState state, + Configuration conf) { + if (!state.getApplicationState().isEmpty()) { + long waitTime = + conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, + YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS); + rmContext.setSchedulerRecoveryStartAndWaitTime(waitTime); + } + } + /** * Retrieve RM bind address from configuration * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 4798120c0daaf..4222888272118 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -198,7 +198,7 @@ protected void serviceStop() throws Exception { */ @SuppressWarnings("unchecked") @VisibleForTesting - void handleNMContainerStatus(NMContainerStatus containerStatus) { + void handleNMContainerStatus(NMContainerStatus containerStatus, NodeId nodeId) { ApplicationAttemptId appAttemptId = containerStatus.getContainerId().getApplicationAttemptId(); RMApp rmApp = @@ -229,7 +229,8 @@ void handleNMContainerStatus(NMContainerStatus containerStatus) { containerStatus.getContainerExitStatus()); // sending master container finished event. RMAppAttemptContainerFinishedEvent evt = - new RMAppAttemptContainerFinishedEvent(appAttemptId, status); + new RMAppAttemptContainerFinishedEvent(appAttemptId, status, + nodeId); rmContext.getDispatcher().getEventHandler().handle(evt); } } @@ -324,7 +325,7 @@ public RegisterNodeManagerResponse registerNodeManager( LOG.info("received container statuses on node manager register :" + request.getNMContainerStatuses()); for (NMContainerStatus status : request.getNMContainerStatuses()) { - handleNMContainerStatus(status); + handleNMContainerStatus(status, nodeId); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/RMApplicationHistoryWriter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/RMApplicationHistoryWriter.java index 58d2e3d1ea4d3..bd328abe7ccf2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/RMApplicationHistoryWriter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/RMApplicationHistoryWriter.java @@ -52,6 +52,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import com.google.common.annotations.VisibleForTesting; + /** *

* {@link ResourceManager} uses this class to write the information of @@ -71,8 +73,10 @@ public class RMApplicationHistoryWriter extends CompositeService { .getLog(RMApplicationHistoryWriter.class); private Dispatcher dispatcher; - private ApplicationHistoryWriter writer; - private boolean historyServiceEnabled; + @VisibleForTesting + ApplicationHistoryWriter writer; + @VisibleForTesting + boolean historyServiceEnabled; public RMApplicationHistoryWriter() { super(RMApplicationHistoryWriter.class.getName()); @@ -80,13 +84,18 @@ public RMApplicationHistoryWriter() { @Override protected synchronized void serviceInit(Configuration conf) throws Exception { - historyServiceEnabled = conf.getBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED, YarnConfiguration.DEFAULT_APPLICATION_HISTORY_ENABLED); + if (conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE) == null || + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE).length() == 0 || + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE).equals( + NullApplicationHistoryStore.class.getName())) { + historyServiceEnabled = false; + } - // Only create the services when the history service is enabled, preventing - // wasting the system resources. + // Only create the services when the history service is enabled and not + // using the null store, preventing wasting the system resources. if (historyServiceEnabled) { writer = createApplicationHistoryStore(conf); addIfService(writer); @@ -112,25 +121,19 @@ protected Dispatcher createDispatcher(Configuration conf) { protected ApplicationHistoryStore createApplicationHistoryStore( Configuration conf) { - // If the history writer is not enabled, a dummy store will be used to - // write nothing - if (historyServiceEnabled) { - try { - Class storeClass = - conf.getClass(YarnConfiguration.APPLICATION_HISTORY_STORE, - FileSystemApplicationHistoryStore.class, + try { + Class storeClass = + conf.getClass(YarnConfiguration.APPLICATION_HISTORY_STORE, + NullApplicationHistoryStore.class, ApplicationHistoryStore.class); - return storeClass.newInstance(); - } catch (Exception e) { - String msg = - "Could not instantiate ApplicationHistoryWriter: " - + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE, - FileSystemApplicationHistoryStore.class.getName()); - LOG.error(msg, e); - throw new YarnRuntimeException(msg, e); - } - } else { - return new NullApplicationHistoryStore(); + return storeClass.newInstance(); + } catch (Exception e) { + String msg = + "Could not instantiate ApplicationHistoryWriter: " + + conf.get(YarnConfiguration.APPLICATION_HISTORY_STORE, + NullApplicationHistoryStore.class.getName()); + LOG.error(msg, e); + throw new YarnRuntimeException(msg, e); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java index ecf37b0405813..5da006c009556 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java @@ -160,7 +160,7 @@ public void appAttemptRegistered(RMAppAttempt appAttempt, @SuppressWarnings("unchecked") public void appAttemptFinished(RMAppAttempt appAttempt, - RMAppAttemptState state, long finishedTime) { + RMAppAttemptState appAttemtpState, RMApp app, long finishedTime) { if (publishSystemMetrics) { dispatcher.getEventHandler().handle( new AppAttemptFinishedEvent( @@ -168,8 +168,10 @@ public void appAttemptFinished(RMAppAttempt appAttempt, appAttempt.getTrackingUrl(), appAttempt.getOriginalTrackingUrl(), appAttempt.getDiagnostics(), - appAttempt.getFinalApplicationStatus(), - RMServerUtils.createApplicationAttemptState(state), + // app will get the final status from app attempt, or create one + // based on app state if it doesn't exist + app.getFinalApplicationStatus(), + RMServerUtils.createApplicationAttemptState(appAttemtpState), finishedTime)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 815b86aa879b2..0b8f321222892 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp; +import java.net.URI; +import java.net.URISyntaxException; import java.util.Collection; import java.util.Collections; import java.util.EnumSet; @@ -76,6 +78,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils; import org.apache.hadoop.yarn.state.InvalidStateTransitonException; import org.apache.hadoop.yarn.state.MultipleArcTransition; import org.apache.hadoop.yarn.state.SingleArcTransition; @@ -84,6 +87,7 @@ import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.hadoop.yarn.webapp.util.WebAppUtils; import com.google.common.annotations.VisibleForTesting; @@ -542,6 +546,7 @@ public ApplicationReport createAndGetApplicationReport(String clientUserName, float progress = 0.0f; org.apache.hadoop.yarn.api.records.Token amrmToken = null; if (allowAccess) { + trackingUrl = getDefaultProxyTrackingUrl(); if (this.currentAttempt != null) { currentApplicationAttemptId = this.currentAttempt.getAppAttemptId(); trackingUrl = this.currentAttempt.getTrackingUrl(); @@ -602,6 +607,20 @@ public ApplicationReport createAndGetApplicationReport(String clientUserName, } } + private String getDefaultProxyTrackingUrl() { + try { + final String scheme = WebAppUtils.getHttpSchemePrefix(conf); + String proxy = WebAppUtils.getProxyHostAndPort(conf); + URI proxyUri = ProxyUriUtils.getUriFromAMUrl(scheme, proxy); + URI result = ProxyUriUtils.getProxyUri(null, proxyUri, applicationId); + return result.toASCIIString(); + } catch (URISyntaxException e) { + LOG.warn("Could not generate default proxy tracking URL for " + + applicationId); + return UNAVAILABLE; + } + } + @Override public long getFinishTime() { this.readLock.lock(); @@ -1162,7 +1181,7 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { int numberOfFailure = app.getNumFailedAppAttempts(); if (!app.submissionContext.getUnmanagedAM() && numberOfFailure < app.maxAppAttempts) { - boolean transferStateFromPreviousAttempt = false; + boolean transferStateFromPreviousAttempt; RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event; transferStateFromPreviousAttempt = failedEvent.getTransferStateFromPreviousAttempt(); @@ -1172,11 +1191,11 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { // Transfer the state from the previous attempt to the current attempt. // Note that the previous failed attempt may still be collecting the // container events from the scheduler and update its data structures - // before the new attempt is created. - if (transferStateFromPreviousAttempt) { - ((RMAppAttemptImpl) app.currentAttempt) - .transferStateFromPreviousAttempt(oldAttempt); - } + // before the new attempt is created. We always transferState for + // finished containers so that they can be acked to NM, + // but when pulling finished container we will check this flag again. + ((RMAppAttemptImpl) app.currentAttempt) + .transferStateFromPreviousAttempt(oldAttempt); return initialState; } else { if (numberOfFailure >= app.maxAppAttempts) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java index 943a5e53d4536..cf8c2bbc86576 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt; import java.util.List; +import java.util.concurrent.ConcurrentMap; import javax.crypto.SecretKey; @@ -31,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; @@ -120,13 +122,28 @@ public interface RMAppAttempt extends EventHandler { List pullJustFinishedContainers(); /** - * Return the list of last set of finished containers. This does not reset the - * finished containers. - * @return the list of just finished contianers, this does not reset the + * Returns a reference to the map of last set of finished containers to the + * corresponding node. This does not reset the finished containers. + * @return the list of just finished containers, this does not reset the * finished containers. */ + ConcurrentMap> + getJustFinishedContainersReference(); + + /** + * Return the list of last set of finished containers. This does not reset + * the finished containers. + * @return the list of just finished containers + */ List getJustFinishedContainers(); + /** + * The map of conatiners per Node that are already sent to the AM. + * @return map of per node list of finished container status sent to AM + */ + ConcurrentMap> + getFinishedContainersSentToAMReference(); + /** * The container on which the Application Master is running. * @return the {@link Container} on which the application master is running. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 863130fd35f15..d75a8716957ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -24,9 +24,12 @@ import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.EnumSet; import java.util.List; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; @@ -52,6 +55,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState; @@ -83,6 +87,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -129,9 +134,16 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private final ApplicationSubmissionContext submissionContext; private Token amrmToken = null; private SecretKey clientTokenMasterKey = null; - - private List justFinishedContainers = - new ArrayList(); + + private ConcurrentMap> + justFinishedContainers = + new ConcurrentHashMap>(); + // Tracks the previous finished containers that are waiting to be + // verified as received by the AM. If the AM sends the next allocate + // request it implicitly acks this list. + private ConcurrentMap> + finishedContainersSentToAM = + new ConcurrentHashMap>(); private Container masterContainer; private float progress = 0; @@ -627,9 +639,27 @@ public float getProgress() { } } + @VisibleForTesting @Override public List getJustFinishedContainers() { this.readLock.lock(); + try { + List returnList = new ArrayList(); + for (Collection containerStatusList : + justFinishedContainers.values()) { + returnList.addAll(containerStatusList); + } + return returnList; + } finally { + this.readLock.unlock(); + } + } + + @Override + public ConcurrentMap> + getJustFinishedContainersReference + () { + this.readLock.lock(); try { return this.justFinishedContainers; } finally { @@ -637,15 +667,68 @@ public List getJustFinishedContainers() { } } + @Override + public ConcurrentMap> + getFinishedContainersSentToAMReference() { + this.readLock.lock(); + try { + return this.finishedContainersSentToAM; + } finally { + this.readLock.unlock(); + } + } + @Override public List pullJustFinishedContainers() { this.writeLock.lock(); try { - List returnList = new ArrayList( - this.justFinishedContainers.size()); - returnList.addAll(this.justFinishedContainers); - this.justFinishedContainers.clear(); + List returnList = new ArrayList(); + + // A new allocate means the AM received the previously sent + // finishedContainers. We can ack this to NM now + for (NodeId nodeId:finishedContainersSentToAM.keySet()) { + + // Clear and get current values + List currentSentContainers = + finishedContainersSentToAM + .put(nodeId, new ArrayList()); + List containerIdList = new ArrayList + (currentSentContainers.size()); + for (ContainerStatus containerStatus:currentSentContainers) { + containerIdList.add(containerStatus.getContainerId()); + } + eventHandler.handle(new RMNodeFinishedContainersPulledByAMEvent( + nodeId, containerIdList)); + } + + // Mark every containerStatus as being sent to AM though we may return + // only the ones that belong to the current attempt + boolean keepContainersAcressAttempts = this.submissionContext + .getKeepContainersAcrossApplicationAttempts(); + for (NodeId nodeId:justFinishedContainers.keySet()) { + + // Clear and get current values + List finishedContainers = justFinishedContainers.put + (nodeId, new ArrayList()); + + if (keepContainersAcressAttempts) { + returnList.addAll(finishedContainers); + } else { + // Filter out containers from previous attempt + for (ContainerStatus containerStatus: finishedContainers) { + if (containerStatus.getContainerId().getApplicationAttemptId() + .equals(this.getAppAttemptId())) { + returnList.add(containerStatus); + } + } + } + + finishedContainersSentToAM.putIfAbsent(nodeId, new ArrayList + ()); + finishedContainersSentToAM.get(nodeId).addAll(finishedContainers); + } + return returnList; } finally { this.writeLock.unlock(); @@ -732,7 +815,7 @@ public void recover(RMState state) throws Exception { } setMasterContainer(attemptState.getMasterContainer()); recoverAppAttemptCredentials(attemptState.getAppAttemptCredentials(), - attemptState.getState()); + attemptState.getState()); this.recoveredFinalState = attemptState.getState(); this.originalTrackingUrl = attemptState.getFinalTrackingUrl(); this.proxiedTrackingUrl = generateProxyUriWithScheme(originalTrackingUrl); @@ -744,7 +827,9 @@ public void recover(RMState state) throws Exception { } public void transferStateFromPreviousAttempt(RMAppAttempt attempt) { - this.justFinishedContainers = attempt.getJustFinishedContainers(); + this.justFinishedContainers = attempt.getJustFinishedContainersReference(); + this.finishedContainersSentToAM = + attempt.getFinishedContainersSentToAMReference(); } private void recoverAppAttemptCredentials(Credentials appAttemptTokens, @@ -1159,8 +1244,10 @@ public void transition(RMAppAttemptImpl appAttempt, appAttempt.rmContext.getRMApplicationHistoryWriter() .applicationAttemptFinished(appAttempt, finalAttemptState); appAttempt.rmContext.getSystemMetricsPublisher() - .appAttemptFinished( - appAttempt, finalAttemptState, System.currentTimeMillis()); + .appAttemptFinished(appAttempt, finalAttemptState, + appAttempt.rmContext.getRMApps().get( + appAttempt.applicationAttemptId.getApplicationId()), + System.currentTimeMillis()); } } @@ -1505,6 +1592,9 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, ContainerStatus containerStatus = containerFinishedEvent.getContainerStatus(); + // Add all finished containers so that they can be acked to NM + addJustFinishedContainer(appAttempt, containerFinishedEvent); + // Is this container the AmContainer? If the finished container is same as // the AMContainer, AppAttempt fails if (appAttempt.masterContainer != null @@ -1517,12 +1607,18 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, return RMAppAttemptState.FINAL_SAVING; } - // Normal container.Put it in completed containers list - appAttempt.justFinishedContainers.add(containerStatus); return this.currentState; } } + private static void addJustFinishedContainer(RMAppAttemptImpl appAttempt, + RMAppAttemptContainerFinishedEvent containerFinishedEvent) { + appAttempt.justFinishedContainers.putIfAbsent(containerFinishedEvent + .getNodeId(), new ArrayList()); + appAttempt.justFinishedContainers.get(containerFinishedEvent + .getNodeId()).add(containerFinishedEvent.getContainerStatus()); + } + private static final class ContainerFinishedAtFinalStateTransition extends BaseTransition { @Override @@ -1531,10 +1627,8 @@ private static final class ContainerFinishedAtFinalStateTransition RMAppAttemptContainerFinishedEvent containerFinishedEvent = (RMAppAttemptContainerFinishedEvent) event; - ContainerStatus containerStatus = - containerFinishedEvent.getContainerStatus(); // Normal container. Add it in completed containers list - appAttempt.justFinishedContainers.add(containerStatus); + addJustFinishedContainer(appAttempt, containerFinishedEvent); } } @@ -1567,6 +1661,9 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, ContainerStatus containerStatus = containerFinishedEvent.getContainerStatus(); + // Add all finished containers so that they can be acked to NM. + addJustFinishedContainer(appAttempt, containerFinishedEvent); + // Is this container the ApplicationMaster container? if (appAttempt.masterContainer.getId().equals( containerStatus.getContainerId())) { @@ -1574,8 +1671,7 @@ public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, appAttempt, containerFinishedEvent); return RMAppAttemptState.FINISHED; } - // Normal container. - appAttempt.justFinishedContainers.add(containerStatus); + return RMAppAttemptState.FINISHING; } } @@ -1590,6 +1686,9 @@ private static class ContainerFinishedAtFinalSavingTransition extends ContainerStatus containerStatus = containerFinishedEvent.getContainerStatus(); + // Add all finished containers so that they can be acked to NM. + addJustFinishedContainer(appAttempt, containerFinishedEvent); + // If this is the AM container, it means the AM container is finished, // but we are not yet acknowledged that the final state has been saved. // Thus, we still return FINAL_SAVING state here. @@ -1609,8 +1708,6 @@ private static class ContainerFinishedAtFinalSavingTransition extends appAttempt.eventCausingFinalSaving), RMAppAttemptState.FINISHED); return; } - // Normal container. - appAttempt.justFinishedContainers.add(containerStatus); } } @@ -1627,7 +1724,7 @@ public AMFinishedAfterFinalSavingTransition( transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { appAttempt.updateInfoOnAMUnregister(amUnregisteredEvent); new FinalTransition(RMAppAttemptState.FINISHED).transition(appAttempt, - event); + event); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptContainerFinishedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptContainerFinishedEvent.java index 3660597d3fc43..39c6f29063571 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptContainerFinishedEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptContainerFinishedEvent.java @@ -20,21 +20,27 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; public class RMAppAttemptContainerFinishedEvent extends RMAppAttemptEvent { private final ContainerStatus containerStatus; + private final NodeId nodeId; public RMAppAttemptContainerFinishedEvent(ApplicationAttemptId appAttemptId, - ContainerStatus containerStatus) { + ContainerStatus containerStatus, NodeId nodeId) { super(appAttemptId, RMAppAttemptEventType.CONTAINER_FINISHED); this.containerStatus = containerStatus; + this.nodeId = nodeId; } public ContainerStatus getContainerStatus() { return this.containerStatus; } + public NodeId getNodeId() { + return this.nodeId; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 885e864124bf4..479734a653b3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -78,13 +78,13 @@ RMContainerEventType.RESERVED, new ContainerReservedTransition()) RMContainerEventType.RECOVER, new ContainerRecoveredTransition()) // Transitions from RESERVED state - .addTransition(RMContainerState.RESERVED, RMContainerState.RESERVED, + .addTransition(RMContainerState.RESERVED, RMContainerState.RESERVED, RMContainerEventType.RESERVED, new ContainerReservedTransition()) - .addTransition(RMContainerState.RESERVED, RMContainerState.ALLOCATED, + .addTransition(RMContainerState.RESERVED, RMContainerState.ALLOCATED, RMContainerEventType.START, new ContainerStartedTransition()) - .addTransition(RMContainerState.RESERVED, RMContainerState.KILLED, + .addTransition(RMContainerState.RESERVED, RMContainerState.KILLED, RMContainerEventType.KILL) // nothing to do - .addTransition(RMContainerState.RESERVED, RMContainerState.RELEASED, + .addTransition(RMContainerState.RESERVED, RMContainerState.RELEASED, RMContainerEventType.RELEASED) // nothing to do @@ -100,7 +100,7 @@ RMContainerEventType.KILL, new FinishedTransition()) .addTransition(RMContainerState.ACQUIRED, RMContainerState.RUNNING, RMContainerEventType.LAUNCHED, new LaunchedTransition()) .addTransition(RMContainerState.ACQUIRED, RMContainerState.COMPLETED, - RMContainerEventType.FINISHED, new ContainerFinishedAtAcquiredState()) + RMContainerEventType.FINISHED, new ContainerFinishedAtAcquiredState()) .addTransition(RMContainerState.ACQUIRED, RMContainerState.RELEASED, RMContainerEventType.RELEASED, new KillTransition()) .addTransition(RMContainerState.ACQUIRED, RMContainerState.EXPIRED, @@ -495,7 +495,8 @@ public void transition(RMContainerImpl container, RMContainerEvent event) { updateAttemptMetrics(container); container.eventHandler.handle(new RMAppAttemptContainerFinishedEvent( - container.appAttemptId, finishedEvent.getRemoteContainerStatus())); + container.appAttemptId, finishedEvent.getRemoteContainerStatus(), + container.getAllocatedNode())); container.rmContext.getRMApplicationHistoryWriter().containerFinished( container); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java index c0096b9b90d1b..b4d0b8bce24a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java @@ -40,6 +40,9 @@ public enum RMNodeEventType { CONTAINER_ALLOCATED, CLEANUP_CONTAINER, + // Source: RMAppAttempt + FINISHED_CONTAINERS_PULLED_BY_AM, + // Source: NMLivelinessMonitor EXPIRE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeFinishedContainersPulledByAMEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeFinishedContainersPulledByAMEvent.java new file mode 100644 index 0000000000000..a4fb70764c216 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeFinishedContainersPulledByAMEvent.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmnode; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; + +import java.util.List; + +// Happens after an implicit ack from AM that the container completion has +// been notified successfully to the AM +public class RMNodeFinishedContainersPulledByAMEvent extends RMNodeEvent { + + private List containers; + + public RMNodeFinishedContainersPulledByAMEvent(NodeId nodeId, + List containers) { + super(nodeId, RMNodeEventType.FINISHED_CONTAINERS_PULLED_BY_AM); + this.containers = containers; + } + + public List getContainers() { + return this.containers; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 3ce641662cc28..f0ae826ee5ea7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -112,6 +112,10 @@ public class RMNodeImpl implements RMNode, EventHandler { private final Set containersToClean = new TreeSet( new ContainerIdComparator()); + /* set of containers that were notified to AM about their completion */ + private final Set finishedContainersPulledByAM = + new HashSet(); + /* the list of applications that have finished and need to be purged */ private final List finishedApplications = new ArrayList(); @@ -135,7 +139,7 @@ RMNodeEventType.STARTED, new AddNodeTransition()) new UpdateNodeResourceWhenUnusableTransition()) //Transitions from RUNNING state - .addTransition(NodeState.RUNNING, + .addTransition(NodeState.RUNNING, EnumSet.of(NodeState.RUNNING, NodeState.UNHEALTHY), RMNodeEventType.STATUS_UPDATE, new StatusUpdateWhenHealthyTransition()) .addTransition(NodeState.RUNNING, NodeState.DECOMMISSIONED, @@ -151,6 +155,9 @@ RMNodeEventType.STATUS_UPDATE, new StatusUpdateWhenHealthyTransition()) RMNodeEventType.CLEANUP_APP, new CleanUpAppTransition()) .addTransition(NodeState.RUNNING, NodeState.RUNNING, RMNodeEventType.CLEANUP_CONTAINER, new CleanUpContainerTransition()) + .addTransition(NodeState.RUNNING, NodeState.RUNNING, + RMNodeEventType.FINISHED_CONTAINERS_PULLED_BY_AM, + new FinishedContainersPulledByAMTransition()) .addTransition(NodeState.RUNNING, NodeState.RUNNING, RMNodeEventType.RECONNECTED, new ReconnectNodeTransition()) .addTransition(NodeState.RUNNING, NodeState.RUNNING, @@ -158,23 +165,30 @@ RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenRunningTransition()) //Transitions from REBOOTED state .addTransition(NodeState.REBOOTED, NodeState.REBOOTED, - RMNodeEventType.RESOURCE_UPDATE, + RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenUnusableTransition()) //Transitions from DECOMMISSIONED state .addTransition(NodeState.DECOMMISSIONED, NodeState.DECOMMISSIONED, - RMNodeEventType.RESOURCE_UPDATE, + RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenUnusableTransition()) - + .addTransition(NodeState.DECOMMISSIONED, NodeState.DECOMMISSIONED, + RMNodeEventType.FINISHED_CONTAINERS_PULLED_BY_AM, + new FinishedContainersPulledByAMTransition()) + //Transitions from LOST state .addTransition(NodeState.LOST, NodeState.LOST, - RMNodeEventType.RESOURCE_UPDATE, + RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenUnusableTransition()) + .addTransition(NodeState.LOST, NodeState.LOST, + RMNodeEventType.FINISHED_CONTAINERS_PULLED_BY_AM, + new FinishedContainersPulledByAMTransition()) //Transitions from UNHEALTHY state - .addTransition(NodeState.UNHEALTHY, + .addTransition(NodeState.UNHEALTHY, EnumSet.of(NodeState.UNHEALTHY, NodeState.RUNNING), - RMNodeEventType.STATUS_UPDATE, new StatusUpdateWhenUnHealthyTransition()) + RMNodeEventType.STATUS_UPDATE, + new StatusUpdateWhenUnHealthyTransition()) .addTransition(NodeState.UNHEALTHY, NodeState.DECOMMISSIONED, RMNodeEventType.DECOMMISSION, new DeactivateNodeTransition(NodeState.DECOMMISSIONED)) @@ -192,7 +206,10 @@ RMNodeEventType.CLEANUP_APP, new CleanUpAppTransition()) RMNodeEventType.CLEANUP_CONTAINER, new CleanUpContainerTransition()) .addTransition(NodeState.UNHEALTHY, NodeState.UNHEALTHY, RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenUnusableTransition()) - + .addTransition(NodeState.UNHEALTHY, NodeState.UNHEALTHY, + RMNodeEventType.FINISHED_CONTAINERS_PULLED_BY_AM, + new FinishedContainersPulledByAMTransition()) + // create the topology tables .installTopology(); @@ -365,8 +382,11 @@ public void updateNodeHeartbeatResponseForCleanup(NodeHeartbeatResponse response response.addAllContainersToCleanup( new ArrayList(this.containersToClean)); response.addAllApplicationsToCleanup(this.finishedApplications); + response.addFinishedContainersPulledByAM( + new ArrayList(this.finishedContainersPulledByAM)); this.containersToClean.clear(); this.finishedApplications.clear(); + this.finishedContainersPulledByAM.clear(); } finally { this.writeLock.unlock(); } @@ -544,12 +564,47 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeReconnectEvent reconnectEvent = (RMNodeReconnectEvent) event; RMNode newNode = reconnectEvent.getReconnectedNode(); rmNode.nodeManagerVersion = newNode.getNodeManagerVersion(); - rmNode.httpPort = newNode.getHttpPort(); - rmNode.httpAddress = newNode.getHttpAddress(); - rmNode.totalCapability = newNode.getTotalCapability(); + List runningApps = reconnectEvent.getRunningApplications(); + boolean noRunningApps = + (runningApps == null) || (runningApps.size() == 0); + + // No application running on the node, so send node-removal event with + // cleaning up old container info. + if (noRunningApps) { + rmNode.nodeUpdateQueue.clear(); + rmNode.context.getDispatcher().getEventHandler().handle( + new NodeRemovedSchedulerEvent(rmNode)); + + if (rmNode.getHttpPort() == newNode.getHttpPort()) { + // Reset heartbeat ID since node just restarted. + rmNode.getLastNodeHeartBeatResponse().setResponseId(0); + if (rmNode.getState() != NodeState.UNHEALTHY) { + // Only add new node if old state is not UNHEALTHY + rmNode.context.getDispatcher().getEventHandler().handle( + new NodeAddedSchedulerEvent(newNode)); + } + } else { + // Reconnected node differs, so replace old node and start new node + switch (rmNode.getState()) { + case RUNNING: + ClusterMetrics.getMetrics().decrNumActiveNodes(); + break; + case UNHEALTHY: + ClusterMetrics.getMetrics().decrNumUnhealthyNMs(); + break; + } + rmNode.context.getRMNodes().put(newNode.getNodeID(), newNode); + rmNode.context.getDispatcher().getEventHandler().handle( + new RMNodeStartedEvent(newNode.getNodeID(), null, null)); + } + } else { + rmNode.httpPort = newNode.getHttpPort(); + rmNode.httpAddress = newNode.getHttpAddress(); + rmNode.totalCapability = newNode.getTotalCapability(); - // Reset heartbeat ID since node just restarted. - rmNode.getLastNodeHeartBeatResponse().setResponseId(0); + // Reset heartbeat ID since node just restarted. + rmNode.getLastNodeHeartBeatResponse().setResponseId(0); + } if (null != reconnectEvent.getRunningApplications()) { for (ApplicationId appId : reconnectEvent.getRunningApplications()) { @@ -564,7 +619,7 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { // Update scheduler node's capacity for reconnect node. rmNode.context.getDispatcher().getEventHandler().handle( new NodeResourceUpdateSchedulerEvent(rmNode, - ResourceOption.newInstance(rmNode.totalCapability, -1))); + ResourceOption.newInstance(newNode.getTotalCapability(), -1))); } } @@ -617,6 +672,16 @@ public void transition(RMNodeImpl rmNode, RMNodeEvent event) { } } + public static class FinishedContainersPulledByAMTransition implements + SingleArcTransition { + + @Override + public void transition(RMNodeImpl rmNode, RMNodeEvent event) { + rmNode.finishedContainersPulledByAM.addAll((( + RMNodeFinishedContainersPulledByAMEvent) event).getContainers()); + } + } + public static class DeactivateNodeTransition implements SingleArcTransition { @@ -691,7 +756,7 @@ public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { new ArrayList(); for (ContainerStatus remoteContainer : statusEvent.getContainers()) { ContainerId containerId = remoteContainer.getContainerId(); - + // Don't bother with containers already scheduled for cleanup, or for // applications already killed. The scheduler doens't need to know any // more about this container diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 6b810d7d8f9ff..bdfc819246980 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -902,6 +902,10 @@ private synchronized void updateNodeAndQueueResource(RMNode nm, } private synchronized void allocateContainersToNode(FiCaSchedulerNode node) { + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } // Assign new containers... // 1. Check for reserved applications diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java index de5a999c2dd85..70a6496a4a9e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationConfiguration.java @@ -130,7 +130,7 @@ public AllocationConfiguration(Configuration conf) { queueMaxAMShares = new HashMap(); userMaxAppsDefault = Integer.MAX_VALUE; queueMaxAppsDefault = Integer.MAX_VALUE; - queueMaxAMShareDefault = -1.0f; + queueMaxAMShareDefault = 0.5f; queueAcls = new HashMap>(); minSharePreemptionTimeouts = new HashMap(); fairSharePreemptionTimeouts = new HashMap(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java index c2dfc84a536b2..2022510c67372 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AllocationFileLoaderService.java @@ -224,7 +224,7 @@ public synchronized void reloadAllocations() throws IOException, new HashMap>(); int userMaxAppsDefault = Integer.MAX_VALUE; int queueMaxAppsDefault = Integer.MAX_VALUE; - float queueMaxAMShareDefault = -1.0f; + float queueMaxAMShareDefault = 0.5f; long defaultFairSharePreemptionTimeout = Long.MAX_VALUE; long defaultMinSharePreemptionTimeout = Long.MAX_VALUE; float defaultFairSharePreemptionThreshold = 0.5f; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index 825c3985c7721..b9966e7f5511a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -171,6 +171,33 @@ private synchronized void unreserveInternal( + priority + "; currentReservation " + currentReservation); } + @Override + public synchronized Resource getHeadroom() { + final FSQueue queue = (FSQueue) this.queue; + SchedulingPolicy policy = queue.getPolicy(); + + Resource queueFairShare = queue.getFairShare(); + Resource queueUsage = queue.getResourceUsage(); + Resource clusterResource = this.scheduler.getClusterResource(); + Resource clusterUsage = this.scheduler.getRootQueueMetrics() + .getAllocatedResources(); + Resource clusterAvailableResource = Resources.subtract(clusterResource, + clusterUsage); + Resource headroom = policy.getHeadroom(queueFairShare, + queueUsage, clusterAvailableResource); + if (LOG.isDebugEnabled()) { + LOG.debug("Headroom calculation for " + this.getName() + ":" + + "Min(" + + "(queueFairShare=" + queueFairShare + + " - queueUsage=" + queueUsage + ")," + + " clusterAvailableResource=" + clusterAvailableResource + + "(clusterResource=" + clusterResource + + " - clusterUsage=" + clusterUsage + ")" + + "Headroom=" + headroom); + } + return headroom; + } + public synchronized float getLocalityWaitFactor( Priority priority, int clusterNodes) { // Estimate: Required unique resources (i.e. hosts + racks) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 9c40d48f06d98..296d8844373d5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1015,6 +1015,11 @@ public int compare(NodeId n1, NodeId n2) { } private synchronized void attemptScheduling(FSSchedulerNode node) { + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } + // Assign new containers... // 1. Check for reserved applications // 2. Schedule if there are no reservations diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java index 473c369b0921f..a96952daaa841 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java @@ -112,7 +112,7 @@ public class FairSchedulerConfiguration extends Configuration { protected static final int DEFAULT_WAIT_TIME_BEFORE_KILL = 15000; /** Whether to assign multiple containers in one check-in. */ - protected static final String ASSIGN_MULTIPLE = CONF_PREFIX + "assignmultiple"; + public static final String ASSIGN_MULTIPLE = CONF_PREFIX + "assignmultiple"; protected static final boolean DEFAULT_ASSIGN_MULTIPLE = false; /** Whether to give more weight to apps requiring many resources. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java index ca006c580edd9..4f3123dffdd67 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java @@ -175,4 +175,19 @@ public abstract boolean checkIfUsageOverFairShare( */ public abstract boolean checkIfAMResourceUsageOverLimit( Resource usage, Resource maxAMResource); + + /** + * Get headroom by calculating the min of clusterAvailable and + * (queueFairShare - queueUsage) resources that are + * applicable to this policy. For eg if only memory then leave other + * resources such as CPU to same as clusterAvailable. + * + * @param queueFairShare fairshare in the queue + * @param queueUsage resources used in the queue + * @param clusterAvailable available resource in cluster + * @return calculated headroom + */ + public abstract Resource getHeadroom(Resource queueFairShare, + Resource queueUsage, Resource clusterAvailable); + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java index 42044bcaac124..3f6cbd19adb31 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java @@ -77,7 +77,7 @@ public void computeSteadyShares(Collection queues, ComputeFairShares.computeSteadyShares(queues, totalResources, type); } } - + @Override public boolean checkIfUsageOverFairShare(Resource usage, Resource fairShare) { return !Resources.fitsIn(usage, fairShare); @@ -88,6 +88,21 @@ public boolean checkIfAMResourceUsageOverLimit(Resource usage, Resource maxAMRes return !Resources.fitsIn(usage, maxAMResource); } + @Override + public Resource getHeadroom(Resource queueFairShare, Resource queueUsage, + Resource clusterAvailable) { + int queueAvailableMemory = + Math.max(queueFairShare.getMemory() - queueUsage.getMemory(), 0); + int queueAvailableCPU = + Math.max(queueFairShare.getVirtualCores() - queueUsage + .getVirtualCores(), 0); + Resource headroom = Resources.createResource( + Math.min(clusterAvailable.getMemory(), queueAvailableMemory), + Math.min(clusterAvailable.getVirtualCores(), + queueAvailableCPU)); + return headroom; + } + @Override public void initialize(Resource clusterCapacity) { comparator.setClusterCapacity(clusterCapacity); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java index 66bb88bf16c81..97669cb4e2744 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java @@ -114,6 +114,17 @@ public Comparator getComparator() { return comparator; } + @Override + public Resource getHeadroom(Resource queueFairShare, + Resource queueUsage, Resource clusterAvailable) { + int queueAvailableMemory = Math.max( + queueFairShare.getMemory() - queueUsage.getMemory(), 0); + Resource headroom = Resources.createResource( + Math.min(clusterAvailable.getMemory(), queueAvailableMemory), + clusterAvailable.getVirtualCores()); + return headroom; + } + @Override public void computeShares(Collection schedulables, Resource totalResources) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java index 591ee4936b957..a2e17ecb0a58c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java @@ -107,6 +107,18 @@ public boolean checkIfAMResourceUsageOverLimit(Resource usage, Resource maxAMRes return usage.getMemory() > maxAMResource.getMemory(); } + @Override + public Resource getHeadroom(Resource queueFairShare, + Resource queueUsage, Resource clusterAvailable) { + int queueAvailableMemory = Math.max( + queueFairShare.getMemory() - queueUsage.getMemory(), 0); + Resource headroom = Resources.createResource( + Math.min(clusterAvailable.getMemory(), queueAvailableMemory), + clusterAvailable.getVirtualCores()); + return headroom; + } + + @Override public byte getApplicableDepth() { return SchedulingPolicy.DEPTH_LEAF; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index d72e7966064cf..ea21c2b3018a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -702,6 +702,12 @@ private synchronized void nodeUpdate(RMNode rmNode) { completedContainer, RMContainerEventType.FINISHED); } + + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } + if (Resources.greaterThanOrEqual(resourceCalculator, clusterResource, node.getAvailableResource(),minimumAllocation)) { LOG.debug("Node heartbeat " + rmNode.getNodeID() + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java index bcf7781fc47fc..8c54f4e62ee99 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerPage.java @@ -220,9 +220,9 @@ public void render(Block html) { " }", " });", " $('#cs').bind('select_node.jstree', function(e, data) {", - " var q = $('.q', data.rslt.obj).first().text();", - " if (q == 'root') q = '';", - " else q = '^' + q.substr(q.lastIndexOf('.') + 1) + '$';", + " var queues = $('.q', data.rslt.obj);", + " var q = '^' + queues.first().text();", + " q += queues.length == 1 ? '$' : '\\\\.';", " $('#apps').dataTable().fnFilter(q, 4, true);", " });", " $('#cs').show();", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 48276205bf9a5..877a12215ed47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -26,6 +26,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -490,7 +491,7 @@ public void testHandleContainerStatusInvalidCompletions() throws Exception { ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); - rm.getResourceTrackerService().handleNMContainerStatus(report); + rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event) any()); // Case 1.2: Master container is null @@ -501,7 +502,7 @@ public void testHandleContainerStatusInvalidCompletions() throws Exception { ContainerId.newInstance(currentAttempt.getAppAttemptId(), 0), ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); - rm.getResourceTrackerService().handleNMContainerStatus(report); + rm.getResourceTrackerService().handleNMContainerStatus(report, null); verify(handler, never()).handle((Event)any()); // Case 2: Managed AM @@ -514,7 +515,7 @@ public void testHandleContainerStatusInvalidCompletions() throws Exception { ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { - rm.getResourceTrackerService().handleNMContainerStatus(report); + rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } @@ -529,7 +530,7 @@ public void testHandleContainerStatusInvalidCompletions() throws Exception { ContainerState.COMPLETE, Resource.newInstance(1024, 1), "Dummy Completed", 0, Priority.newInstance(10), 1234); try { - rm.getResourceTrackerService().handleNMContainerStatus(report); + rm.getResourceTrackerService().handleNMContainerStatus(report, null); } catch (Exception e) { // expected - ignore } @@ -599,6 +600,16 @@ protected Dispatcher createDispatcher() { dispatcher.await(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); Assert.assertEquals(5120 + 10240, metrics.getAvailableMB()); + + // reconnect of node with changed capability and running applications + List runningApps = new ArrayList(); + runningApps.add(ApplicationId.newInstance(1, 0)); + nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); + dispatcher.await(); + response = nm1.nodeHeartbeat(true); + dispatcher.await(); + Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); + Assert.assertEquals(5120 + 15360, metrics.getAvailableMB()); } private void writeToHostsFile(String... hosts) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index 02983c2568246..5f00f3180b553 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -37,10 +37,12 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; @@ -62,6 +64,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; +import org.apache.hadoop.yarn.util.ControlledClock; +import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -98,6 +102,7 @@ public void setup() throws UnknownHostException { conf.setClass(YarnConfiguration.RM_SCHEDULER, schedulerClass, ResourceScheduler.class); conf.setBoolean(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true); + conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0); DefaultMetricsSystem.setMiniClusterMode(true); } @@ -479,6 +484,7 @@ private void checkParentQueue(ParentQueue parentQueue, int numContainers, @Test(timeout = 20000) public void testAMfailedBetweenRMRestart() throws Exception { MemoryRMStateStore memStore = new MemoryRMStateStore(); + conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0); memStore.init(conf); rm1 = new MockRM(conf, memStore); rm1.start(); @@ -762,4 +768,55 @@ public static void waitForNumContainersToRecover(int num, MockRM rm, Thread.sleep(200); } } + + @Test (timeout = 20000) + public void testNewContainersNotAllocatedDuringSchedulerRecovery() + throws Exception { + conf.setLong( + YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 4000); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + rm1 = new MockRM(conf, memStore); + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); + nm1.registerNode(); + RMApp app1 = rm1.submitApp(200); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // Restart RM + rm2 = new MockRM(conf, memStore); + rm2.start(); + nm1.setResourceTrackerService(rm2.getResourceTrackerService()); + nm1.registerNode(); + ControlledClock clock = new ControlledClock(new SystemClock()); + long startTime = System.currentTimeMillis(); + ((RMContextImpl)rm2.getRMContext()).setSystemClock(clock); + am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext()); + am1.registerAppAttempt(true); + rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + + // AM request for new containers + am1.allocate("127.0.0.1", 1000, 1, new ArrayList()); + + List containers = new ArrayList(); + clock.setTime(startTime + 2000); + nm1.nodeHeartbeat(true); + + // sleep some time as allocation happens asynchronously. + Thread.sleep(3000); + containers.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + // container is not allocated during scheduling recovery. + Assert.assertTrue(containers.isEmpty()); + + clock.setTime(startTime + 8000); + nm1.nodeHeartbeat(true); + // Container is created after recovery is done. + while (containers.isEmpty()) { + containers.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java index e83a6b9cc4812..78077d4fa3416 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/ahs/TestRMApplicationHistoryWriter.java @@ -60,6 +60,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairSchedulerConfiguration; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -78,6 +81,8 @@ public void setup() { store = new MemoryApplicationHistoryStore(); Configuration conf = new Configuration(); conf.setBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED, true); + conf.setClass(YarnConfiguration.APPLICATION_HISTORY_STORE, + MemoryApplicationHistoryStore.class, ApplicationHistoryStore.class); writer = new RMApplicationHistoryWriter() { @Override @@ -174,6 +179,22 @@ private static RMContainer createRMContainer(ContainerId containerId) { return container; } + @Test + public void testDefaultStoreSetup() throws Exception { + Configuration conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED, true); + RMApplicationHistoryWriter writer = new RMApplicationHistoryWriter(); + writer.init(conf); + writer.start(); + try { + Assert.assertFalse(writer.historyServiceEnabled); + Assert.assertNull(writer.writer); + } finally { + writer.stop(); + writer.close(); + } + } + @Test public void testWriteApplication() throws Exception { RMApp app = createRMApp(ApplicationId.newInstance(0, 1)); @@ -351,10 +372,29 @@ private boolean allEventsHandled(int expected) { } @Test - public void testRMWritingMassiveHistory() throws Exception { + public void testRMWritingMassiveHistoryForFairSche() throws Exception { + //test WritingMassiveHistory for Fair Scheduler. + testRMWritingMassiveHistory(true); + } + + @Test + public void testRMWritingMassiveHistoryForCapacitySche() throws Exception { + //test WritingMassiveHistory for Capacity Scheduler. + testRMWritingMassiveHistory(false); + } + + private void testRMWritingMassiveHistory(boolean isFS) throws Exception { // 1. Show RM can run with writing history data // 2. Test additional workload of processing history events YarnConfiguration conf = new YarnConfiguration(); + if (isFS) { + conf.setBoolean(FairSchedulerConfiguration.ASSIGN_MULTIPLE, true); + conf.set("yarn.resourcemanager.scheduler.class", + FairScheduler.class.getName()); + } else { + conf.set("yarn.resourcemanager.scheduler.class", + CapacityScheduler.class.getName()); + } // don't process history events MockRM rm = new MockRM(conf) { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index fcb4e450b0890..ba592fc9768bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -98,6 +98,9 @@ public void testAMRestartWithExistingContainers() throws Exception { Thread.sleep(200); } + ContainerId amContainerId = ContainerId.newInstance(am1 + .getApplicationAttemptId(), 1); + // launch the 2nd container, for testing running container transferred. nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING); ContainerId containerId2 = @@ -196,11 +199,15 @@ public void testAMRestartWithExistingContainers() throws Exception { // completed containerId4 is also transferred to the new attempt. RMAppAttempt newAttempt = app1.getRMAppAttempt(am2.getApplicationAttemptId()); - // 4 containers finished, acquired/allocated/reserved/completed. - waitForContainersToFinish(4, newAttempt); + // 4 containers finished, acquired/allocated/reserved/completed + AM + // container. + waitForContainersToFinish(5, newAttempt); boolean container3Exists = false, container4Exists = false, container5Exists = - false, container6Exists = false; + false, container6Exists = false, amContainerExists = false; for(ContainerStatus status : newAttempt.getJustFinishedContainers()) { + if(status.getContainerId().equals(amContainerId)) { + amContainerExists = true; + } if(status.getContainerId().equals(containerId3)) { // containerId3 is the container ran by previous attempt but finished by the // new attempt. @@ -220,8 +227,11 @@ public void testAMRestartWithExistingContainers() throws Exception { container6Exists = true; } } - Assert.assertTrue(container3Exists && container4Exists && container5Exists - && container6Exists); + Assert.assertTrue(amContainerExists); + Assert.assertTrue(container3Exists); + Assert.assertTrue(container4Exists); + Assert.assertTrue(container5Exists); + Assert.assertTrue(container6Exists); // New SchedulerApplicationAttempt also has the containers info. rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING); @@ -240,14 +250,14 @@ public void testAMRestartWithExistingContainers() throws Exception { // all 4 normal containers finished. System.out.println("New attempt's just finished containers: " + newAttempt.getJustFinishedContainers()); - waitForContainersToFinish(5, newAttempt); + waitForContainersToFinish(6, newAttempt); rm1.stop(); } private void waitForContainersToFinish(int expectedNum, RMAppAttempt attempt) throws InterruptedException { int count = 0; - while (attempt.getJustFinishedContainers().size() != expectedNum + while (attempt.getJustFinishedContainers().size() < expectedNum && count < 500) { Thread.sleep(100); count++; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index a97ae7b1efc92..63343e9521d49 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -174,7 +174,9 @@ public void testPublishAppAttemptMetrics() throws Exception { ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1); RMAppAttempt appAttempt = createRMAppAttempt(appAttemptId); metricsPublisher.appAttemptRegistered(appAttempt, Integer.MAX_VALUE + 1L); - metricsPublisher.appAttemptFinished(appAttempt, RMAppAttemptState.FINISHED, + RMApp app = mock(RMApp.class); + when(app.getFinalApplicationStatus()).thenReturn(FinalApplicationStatus.UNDEFINED); + metricsPublisher.appAttemptFinished(appAttempt, RMAppAttemptState.FINISHED, app, Integer.MAX_VALUE + 2L); TimelineEntity entity = null; do { @@ -222,7 +224,7 @@ public void testPublishAppAttemptMetrics() throws Exception { event.getEventInfo().get( AppAttemptMetricsConstants.ORIGINAL_TRACKING_URL_EVENT_INFO)); Assert.assertEquals( - appAttempt.getFinalApplicationStatus().toString(), + FinalApplicationStatus.UNDEFINED.toString(), event.getEventInfo().get( AppAttemptMetricsConstants.FINAL_STATUS_EVENT_INFO)); Assert.assertEquals( @@ -340,8 +342,6 @@ private static RMAppAttempt createRMAppAttempt( when(appAttempt.getTrackingUrl()).thenReturn("test tracking url"); when(appAttempt.getOriginalTrackingUrl()).thenReturn( "test original tracking url"); - when(appAttempt.getFinalApplicationStatus()).thenReturn( - FinalApplicationStatus.UNDEFINED); return appAttempt; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java index 8a2840e8632bf..a0c2b01607ba7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java @@ -112,6 +112,13 @@ public void setup() { // report "ideal" preempt conf.setFloat(TOTAL_PREEMPTION_PER_ROUND, (float) 1.0); conf.setFloat(NATURAL_TERMINATION_FACTOR, (float) 1.0); + conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, + ProportionalCapacityPreemptionPolicy.class.getCanonicalName()); + conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true); + // FairScheduler doesn't support this test, + // Set CapacityScheduler as the scheduler for this test. + conf.set("yarn.resourcemanager.scheduler.class", + CapacityScheduler.class.getName()); mClock = mock(Clock.class); mCS = mock(CapacityScheduler.class); @@ -441,11 +448,6 @@ public void testContainerOrdering(){ @Test public void testPolicyInitializeAfterSchedulerInitialized() { - Configuration conf = new Configuration(); - conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES, - ProportionalCapacityPreemptionPolicy.class.getCanonicalName()); - conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true); - @SuppressWarnings("resource") MockRM rm = new MockRM(conf); rm.init(conf); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 08749688b6400..457f21e061aa3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -32,8 +32,6 @@ import java.util.Collection; import java.util.Map; -import org.junit.Assert; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -75,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -961,6 +960,9 @@ public void testGetAppReport() { Assert.assertEquals(report.getApplicationResourceUsageReport(),RMServerUtils.DUMMY_APPLICATION_RESOURCE_USAGE_REPORT); report = app.createAndGetApplicationReport("clientuser", true); Assert.assertNotNull(report.getApplicationResourceUsageReport()); + Assert.assertTrue("bad proxy url for app", + report.getTrackingUrl().endsWith("/proxy/" + app.getApplicationId() + + "/")); } private void verifyApplicationFinished(RMAppState state) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 6608ccd08e819..15028f9b00769 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -28,6 +28,7 @@ import static org.mockito.Matchers.anyLong; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -35,6 +36,7 @@ import java.net.URI; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -76,6 +78,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent; @@ -90,9 +93,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent; + +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -151,6 +158,7 @@ public class TestRMAppAttemptTransitions { private NMTokenSecretManagerInRM nmTokenManager = spy(new NMTokenSecretManagerInRM(conf)); private boolean transferStateFromPreviousAttempt = false; + private EventHandler rmnodeEventHandler; private final class TestApplicationAttemptEventDispatcher implements EventHandler { @@ -203,7 +211,7 @@ public void handle(AMLauncherEvent event) { applicationMasterLauncher.handle(event); } } - + private static int appId = 1; private ApplicationSubmissionContext submissionContext = null; @@ -268,6 +276,9 @@ public void setUp() throws Exception { rmDispatcher.register(AMLauncherEventType.class, new TestAMLauncherEventDispatcher()); + rmnodeEventHandler = mock(RMNodeImpl.class); + rmDispatcher.register(RMNodeEventType.class, rmnodeEventHandler); + rmDispatcher.init(conf); rmDispatcher.start(); @@ -289,7 +300,6 @@ public void setUp() throws Exception { Mockito.doReturn(resourceScheduler).when(spyRMContext).getScheduler(); - final String user = MockApps.newUserName(); final String queue = MockApps.newQueue(); submissionContext = mock(ApplicationSubmissionContext.class); when(submissionContext.getQueue()).thenReturn(queue); @@ -576,6 +586,8 @@ private void testAppAttemptFinishedState(Container container, } assertEquals(finishedContainerCount, applicationAttempt .getJustFinishedContainers().size()); + Assert.assertEquals(0, getFinishedContainersSentToAM(applicationAttempt) + .size()); assertEquals(container, applicationAttempt.getMasterContainer()); assertEquals(finalStatus, applicationAttempt.getFinalApplicationStatus()); verifyTokenCount(applicationAttempt.getAppAttemptId(), 1); @@ -705,7 +717,8 @@ private void testUnmanagedAMSuccess(String url) { application.handle(new RMAppRunningOnNodeEvent(application.getApplicationId(), container.getNodeId())); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class))); + applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class), + container.getNodeId())); // complete AM String diagnostics = "Successful"; FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; @@ -753,10 +766,11 @@ public void testUsageReport() { when(appResUsgRpt.getMemorySeconds()).thenReturn(223456L); when(appResUsgRpt.getVcoreSeconds()).thenReturn(75544L); sendAttemptUpdateSavedEvent(applicationAttempt); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( attemptId, ContainerStatus.newInstance( - amContainer.getId(), ContainerState.COMPLETE, "", 0))); + amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); when(scheduler.getSchedulerAppInfo(eq(attemptId))).thenReturn(null); @@ -858,8 +872,9 @@ public void testAMCrashAtScheduled() { SchedulerUtils.LOST_CONTAINER); // send CONTAINER_FINISHED event at SCHEDULED state, // The state should be FINAL_SAVING with previous state SCHEDULED + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - applicationAttempt.getAppAttemptId(), cs)); + applicationAttempt.getAppAttemptId(), cs, anyNodeId)); // createApplicationAttemptState will return previous state (SCHEDULED), // if the current state is FINAL_SAVING. assertEquals(YarnApplicationAttemptState.SCHEDULED, @@ -905,8 +920,9 @@ public void testAMCrashAtAllocated() { ContainerStatus cs = BuilderUtils.newContainerStatus(amContainer.getId(), ContainerState.COMPLETE, containerDiagMsg, exitCode); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - applicationAttempt.getAppAttemptId(), cs)); + applicationAttempt.getAppAttemptId(), cs, anyNodeId)); assertEquals(YarnApplicationAttemptState.ALLOCATED, applicationAttempt.createApplicationAttemptState()); sendAttemptUpdateSavedEvent(applicationAttempt); @@ -929,16 +945,17 @@ public void testRunningToFailed() { ContainerStatus cs = BuilderUtils.newContainerStatus(amContainer.getId(), ContainerState.COMPLETE, containerDiagMsg, exitCode); ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId(); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - appAttemptId, cs)); + appAttemptId, cs, anyNodeId)); // ignored ContainerFinished and Expire at FinalSaving if we were supposed // to Failed state. assertEquals(RMAppAttemptState.FINAL_SAVING, - applicationAttempt.getAppAttemptState()); + applicationAttempt.getAppAttemptState()); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0))); + amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); applicationAttempt.handle(new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); assertEquals(RMAppAttemptState.FINAL_SAVING, @@ -948,7 +965,7 @@ public void testRunningToFailed() { sendAttemptUpdateSavedEvent(applicationAttempt); assertEquals(RMAppAttemptState.FAILED, applicationAttempt.getAppAttemptState()); - assertEquals(0,applicationAttempt.getJustFinishedContainers().size()); + assertEquals(2, applicationAttempt.getJustFinishedContainers().size()); assertEquals(amContainer, applicationAttempt.getMasterContainer()); assertEquals(0, application.getRanNodes().size()); String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app", @@ -972,10 +989,11 @@ public void testRunningToKilled() { // ignored ContainerFinished and Expire at FinalSaving if we were supposed // to Killed state. assertEquals(RMAppAttemptState.FINAL_SAVING, - applicationAttempt.getAppAttemptState()); + applicationAttempt.getAppAttemptState()); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0))); + amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); applicationAttempt.handle(new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); assertEquals(RMAppAttemptState.FINAL_SAVING, @@ -985,7 +1003,7 @@ public void testRunningToKilled() { sendAttemptUpdateSavedEvent(applicationAttempt); assertEquals(RMAppAttemptState.KILLED, applicationAttempt.getAppAttemptState()); - assertEquals(0,applicationAttempt.getJustFinishedContainers().size()); + assertEquals(1,applicationAttempt.getJustFinishedContainers().size()); assertEquals(amContainer, applicationAttempt.getMasterContainer()); assertEquals(0, application.getRanNodes().size()); String rmAppPageUrl = pjoin(RM_WEBAPP_ADDR, "cluster", "app", @@ -1145,13 +1163,14 @@ public void testFinishingToFinishing() { unregisterApplicationAttempt(amContainer, finalStatus, trackingUrl, diagnostics); // container must be AM container to move from FINISHING to FINISHED + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle( new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( BuilderUtils.newContainerId( applicationAttempt.getAppAttemptId(), 42), - ContainerState.COMPLETE, "", 0))); + ContainerState.COMPLETE, "", 0), anyNodeId)); testAppAttemptFinishingState(amContainer, finalStatus, trackingUrl, diagnostics); } @@ -1166,13 +1185,14 @@ public void testSuccessfulFinishingToFinished() { String diagnostics = "Successful"; unregisterApplicationAttempt(amContainer, finalStatus, trackingUrl, diagnostics); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle( new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(amContainer.getId(), - ContainerState.COMPLETE, "", 0))); + ContainerState.COMPLETE, "", 0), anyNodeId)); testAppAttemptFinishedState(amContainer, finalStatus, trackingUrl, - diagnostics, 0, false); + diagnostics, 1, false); } // While attempt is at FINAL_SAVING, Contaienr_Finished event may come before @@ -1196,15 +1216,16 @@ public void testSuccessfulFinishingToFinished() { assertEquals(YarnApplicationAttemptState.RUNNING, applicationAttempt.createApplicationAttemptState()); // Container_finished event comes before Attempt_Saved event. + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0))); + amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); assertEquals(RMAppAttemptState.FINAL_SAVING, applicationAttempt.getAppAttemptState()); // send attempt_saved sendAttemptUpdateSavedEvent(applicationAttempt); testAppAttemptFinishedState(amContainer, finalStatus, trackingUrl, - diagnostics, 0, false); + diagnostics, 1, false); } // While attempt is at FINAL_SAVING, Expire event may come before @@ -1236,6 +1257,71 @@ public void testFinalSavingToFinishedWithExpire() { diagnostics, 0, false); } + @Test + public void testFinishedContainer() { + Container amContainer = allocateApplicationAttempt(); + launchApplicationAttempt(amContainer); + runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false); + + // Complete one container + ContainerId containerId1 = BuilderUtils.newContainerId(applicationAttempt + .getAppAttemptId(), 2); + Container container1 = mock(Container.class); + ContainerStatus containerStatus1 = mock(ContainerStatus.class); + when(container1.getId()).thenReturn( + containerId1); + when(containerStatus1.getContainerId()).thenReturn(containerId1); + when(container1.getNodeId()).thenReturn(NodeId.newInstance("host", 1234)); + + application.handle(new RMAppRunningOnNodeEvent(application + .getApplicationId(), + container1.getNodeId())); + applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( + applicationAttempt.getAppAttemptId(), containerStatus1, + container1.getNodeId())); + + ArgumentCaptor captor = + ArgumentCaptor.forClass(RMNodeFinishedContainersPulledByAMEvent.class); + + // Verify justFinishedContainers + Assert.assertEquals(1, applicationAttempt.getJustFinishedContainers() + .size()); + Assert.assertEquals(container1.getId(), applicationAttempt + .getJustFinishedContainers().get(0).getContainerId()); + Assert.assertEquals(0, getFinishedContainersSentToAM(applicationAttempt) + .size()); + + // Verify finishedContainersSentToAM gets container after pull + List containerStatuses = applicationAttempt + .pullJustFinishedContainers(); + Assert.assertEquals(1, containerStatuses.size()); + Mockito.verify(rmnodeEventHandler, never()).handle(Mockito + .any(RMNodeEvent.class)); + Assert.assertTrue(applicationAttempt.getJustFinishedContainers().isEmpty()); + Assert.assertEquals(1, getFinishedContainersSentToAM(applicationAttempt) + .size()); + + // Verify container is acked to NM via the RMNodeEvent after second pull + containerStatuses = applicationAttempt.pullJustFinishedContainers(); + Assert.assertEquals(0, containerStatuses.size()); + Mockito.verify(rmnodeEventHandler).handle(captor.capture()); + Assert.assertEquals(container1.getId(), captor.getValue().getContainers() + .get(0)); + Assert.assertTrue(applicationAttempt.getJustFinishedContainers().isEmpty()); + Assert.assertEquals(0, getFinishedContainersSentToAM(applicationAttempt) + .size()); + } + + private static List getFinishedContainersSentToAM( + RMAppAttempt applicationAttempt) { + List containers = new ArrayList(); + for (List containerStatuses: applicationAttempt + .getFinishedContainersSentToAMReference().values()) { + containers.addAll(containerStatuses); + } + return containers; + } + // this is to test user can get client tokens only after the client token // master key is saved in the state store and also registered in // ClientTokenSecretManager @@ -1282,8 +1368,9 @@ public void testFailedToFailed() { ContainerStatus.newInstance(amContainer.getId(), ContainerState.COMPLETE, "some error", 123); ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId(); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - appAttemptId, cs1)); + appAttemptId, cs1, anyNodeId)); assertEquals(YarnApplicationAttemptState.RUNNING, applicationAttempt.createApplicationAttemptState()); sendAttemptUpdateSavedEvent(applicationAttempt); @@ -1294,15 +1381,21 @@ public void testFailedToFailed() { verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); // failed attempt captured the container finished event. - assertEquals(0, applicationAttempt.getJustFinishedContainers().size()); + assertEquals(1, applicationAttempt.getJustFinishedContainers().size()); ContainerStatus cs2 = ContainerStatus.newInstance(ContainerId.newInstance(appAttemptId, 2), ContainerState.COMPLETE, "", 0); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - appAttemptId, cs2)); - assertEquals(1, applicationAttempt.getJustFinishedContainers().size()); - assertEquals(cs2.getContainerId(), applicationAttempt - .getJustFinishedContainers().get(0).getContainerId()); + appAttemptId, cs2, anyNodeId)); + assertEquals(2, applicationAttempt.getJustFinishedContainers().size()); + boolean found = false; + for (ContainerStatus containerStatus:applicationAttempt + .getJustFinishedContainers()) { + if (cs2.getContainerId().equals(containerStatus.getContainerId())) { + found = true; + } + } + assertTrue(found); } @@ -1323,8 +1416,9 @@ scheduler, masterService, submissionContext, new Configuration(), ContainerStatus.newInstance(amContainer.getId(), ContainerState.COMPLETE, "some error", 123); ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId(); + NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( - appAttemptId, cs1)); + appAttemptId, cs1, anyNodeId)); assertEquals(YarnApplicationAttemptState.RUNNING, applicationAttempt.createApplicationAttemptState()); sendAttemptUpdateSavedEvent(applicationAttempt); @@ -1385,7 +1479,7 @@ private void verifyApplicationAttemptFinished(RMAppAttemptState state) { finalState = ArgumentCaptor.forClass(RMAppAttemptState.class); verify(publisher).appAttemptFinished(any(RMAppAttempt.class), finalState.capture(), - anyLong()); + any(RMApp.class), anyLong()); Assert.assertEquals(state, finalState.getValue()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java index 0ab1f70147b84..f560690d935f7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSAppAttempt.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import static org.junit.Assert.assertEquals; @@ -26,7 +27,12 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -185,4 +191,61 @@ public void testLocalityLevelWithoutDelays() { assertEquals(NodeType.OFF_SWITCH, schedulerApp.getAllowedLocalityLevel( prio, 10, -1.0, -1.0)); } + + @Test + public void testHeadroom() { + final FairScheduler mockScheduler = Mockito.mock(FairScheduler.class); + Mockito.when(mockScheduler.getClock()).thenReturn(scheduler.getClock()); + + final FSLeafQueue mockQueue = Mockito.mock(FSLeafQueue.class); + final Resource queueFairShare = Resources.createResource(4096, 4); + final Resource queueUsage = Resource.newInstance(1024, 1); + final Resource clusterResource = Resources.createResource(8192, 8); + final Resource clusterUsage = Resources.createResource(6144, 2); + final QueueMetrics fakeRootQueueMetrics = Mockito.mock(QueueMetrics.class); + + ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1); + RMContext rmContext = resourceManager.getRMContext(); + FSAppAttempt schedulerApp = + new FSAppAttempt(mockScheduler, applicationAttemptId, "user1", mockQueue , + null, rmContext); + + Mockito.when(mockQueue.getFairShare()).thenReturn(queueFairShare); + Mockito.when(mockQueue.getResourceUsage()).thenReturn(queueUsage); + Mockito.when(mockScheduler.getClusterResource()).thenReturn + (clusterResource); + Mockito.when(fakeRootQueueMetrics.getAllocatedResources()).thenReturn + (clusterUsage); + Mockito.when(mockScheduler.getRootQueueMetrics()).thenReturn + (fakeRootQueueMetrics); + + int minClusterAvailableMemory = 2048; + int minClusterAvailableCPU = 6; + int minQueueAvailableCPU = 3; + + // Min of Memory and CPU across cluster and queue is used in + // DominantResourceFairnessPolicy + Mockito.when(mockQueue.getPolicy()).thenReturn(SchedulingPolicy + .getInstance(DominantResourceFairnessPolicy.class)); + verifyHeadroom(schedulerApp, minClusterAvailableMemory, + minQueueAvailableCPU); + + // Fair and Fifo ignore CPU of queue, so use cluster available CPU + Mockito.when(mockQueue.getPolicy()).thenReturn(SchedulingPolicy + .getInstance(FairSharePolicy.class)); + verifyHeadroom(schedulerApp, minClusterAvailableMemory, + minClusterAvailableCPU); + + Mockito.when(mockQueue.getPolicy()).thenReturn(SchedulingPolicy + .getInstance(FifoPolicy.class)); + verifyHeadroom(schedulerApp, minClusterAvailableMemory, + minClusterAvailableCPU); + } + + protected void verifyHeadroom(FSAppAttempt schedulerApp, + int expectedMemory, int expectedCPU) { + Resource headroom = schedulerApp.getHeadroom(); + assertEquals(expectedMemory, headroom.getMemory()); + assertEquals(expectedCPU, headroom.getVirtualCores()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 1c9b2894fc315..67164c6c0f5f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -131,8 +131,14 @@ public void setUp() throws IOException { @After public void tearDown() { - scheduler = null; - resourceManager = null; + if (scheduler != null) { + scheduler.stop(); + scheduler = null; + } + if (resourceManager != null) { + resourceManager.stop(); + resourceManager = null; + } QueueMetrics.clearQueueMetrics(); DefaultMetricsSystem.shutdown(); } @@ -140,7 +146,7 @@ public void tearDown() { @Test (timeout = 30000) public void testConfValidation() throws Exception { - FairScheduler scheduler = new FairScheduler(); + scheduler = new FairScheduler(); Configuration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 2048); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 1024); @@ -212,7 +218,7 @@ public void testLoadConfigurationOnInitialize() throws IOException { @Test public void testNonMinZeroResourcesSettings() throws IOException { - FairScheduler fs = new FairScheduler(); + scheduler = new FairScheduler(); YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 256); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 1); @@ -220,17 +226,17 @@ public void testNonMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); - fs.init(conf); - fs.reinitialize(conf, null); - Assert.assertEquals(256, fs.getMinimumResourceCapability().getMemory()); - Assert.assertEquals(1, fs.getMinimumResourceCapability().getVirtualCores()); - Assert.assertEquals(512, fs.getIncrementResourceCapability().getMemory()); - Assert.assertEquals(2, fs.getIncrementResourceCapability().getVirtualCores()); + scheduler.init(conf); + scheduler.reinitialize(conf, null); + Assert.assertEquals(256, scheduler.getMinimumResourceCapability().getMemory()); + Assert.assertEquals(1, scheduler.getMinimumResourceCapability().getVirtualCores()); + Assert.assertEquals(512, scheduler.getIncrementResourceCapability().getMemory()); + Assert.assertEquals(2, scheduler.getIncrementResourceCapability().getVirtualCores()); } @Test public void testMinZeroResourcesSettings() throws IOException { - FairScheduler fs = new FairScheduler(); + scheduler = new FairScheduler(); YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 0); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 0); @@ -238,12 +244,12 @@ public void testMinZeroResourcesSettings() throws IOException { FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_MB, 512); conf.setInt( FairSchedulerConfiguration.RM_SCHEDULER_INCREMENT_ALLOCATION_VCORES, 2); - fs.init(conf); - fs.reinitialize(conf, null); - Assert.assertEquals(0, fs.getMinimumResourceCapability().getMemory()); - Assert.assertEquals(0, fs.getMinimumResourceCapability().getVirtualCores()); - Assert.assertEquals(512, fs.getIncrementResourceCapability().getMemory()); - Assert.assertEquals(2, fs.getIncrementResourceCapability().getVirtualCores()); + scheduler.init(conf); + scheduler.reinitialize(conf, null); + Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getMemory()); + Assert.assertEquals(0, scheduler.getMinimumResourceCapability().getVirtualCores()); + Assert.assertEquals(512, scheduler.getIncrementResourceCapability().getMemory()); + Assert.assertEquals(2, scheduler.getIncrementResourceCapability().getVirtualCores()); } @Test @@ -3128,7 +3134,7 @@ public void testQueueMaxAMShareDefault() throws Exception { out.println(""); out.println(""); out.println(""); - out.println("1.0"); + out.println("0.4"); out.println(""); out.println(""); out.println(""); @@ -3172,40 +3178,42 @@ public void testQueueMaxAMShareDefault() throws Exception { assertEquals("Queue queue5's fair share should be 0", 0, queue5 .getFairShare().getMemory()); - List queues = Arrays.asList("root.default", "root.queue3", - "root.queue4", "root.queue5"); + List queues = Arrays.asList("root.queue3", "root.queue4", + "root.queue5"); for (String queue : queues) { createSchedulingRequest(1 * 1024, queue, "user1"); scheduler.update(); scheduler.handle(updateEvent); } - Resource amResource1 = Resource.newInstance(2048, 1); + Resource amResource1 = Resource.newInstance(1024, 1); int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority(); - // Exceeds queue limit, but default maxAMShare is -1.0 so it doesn't matter + // The fair share is 2048 MB, and the default maxAMShare is 0.5f, + // so the AM is accepted. ApplicationAttemptId attId1 = createAppAttemptId(1, 1); createApplicationWithAMResource(attId1, "queue1", "test1", amResource1); - createSchedulingRequestExistingApplication(2048, 1, amPriority, attId1); + createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1); FSAppAttempt app1 = scheduler.getSchedulerApp(attId1); scheduler.update(); scheduler.handle(updateEvent); - assertEquals("Application1's AM requests 2048 MB memory", - 2048, app1.getAMResource().getMemory()); + assertEquals("Application1's AM requests 1024 MB memory", + 1024, app1.getAMResource().getMemory()); assertEquals("Application1's AM should be running", 1, app1.getLiveContainers().size()); - assertEquals("Queue1's AM resource usage should be 2048 MB memory", - 2048, queue1.getAmResourceUsage().getMemory()); + assertEquals("Queue1's AM resource usage should be 1024 MB memory", + 1024, queue1.getAmResourceUsage().getMemory()); - // Exceeds queue limit, and maxAMShare is 1.0 + // Now the fair share is 1639 MB, and the maxAMShare is 0.4f, + // so the AM is not accepted. ApplicationAttemptId attId2 = createAppAttemptId(2, 1); createApplicationWithAMResource(attId2, "queue2", "test1", amResource1); - createSchedulingRequestExistingApplication(2048, 1, amPriority, attId2); + createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2); FSAppAttempt app2 = scheduler.getSchedulerApp(attId2); scheduler.update(); scheduler.handle(updateEvent); - assertEquals("Application2's AM requests 2048 MB memory", - 2048, app2.getAMResource().getMemory()); + assertEquals("Application2's AM requests 1024 MB memory", + 1024, app2.getAMResource().getMemory()); assertEquals("Application2's AM should not be running", 0, app2.getLiveContainers().size()); assertEquals("Queue2's AM resource usage should be 0 MB memory", @@ -3291,49 +3299,49 @@ public void testMaxRunningAppsHierarchicalQueues() throws Exception { @Test (timeout = 10000) public void testContinuousScheduling() throws Exception { // set continuous scheduling enabled - FairScheduler fs = new FairScheduler(); + scheduler = new FairScheduler(); Configuration conf = createConfiguration(); conf.setBoolean(FairSchedulerConfiguration.CONTINUOUS_SCHEDULING_ENABLED, true); - fs.setRMContext(resourceManager.getRMContext()); - fs.init(conf); - fs.start(); - fs.reinitialize(conf, resourceManager.getRMContext()); + scheduler.setRMContext(resourceManager.getRMContext()); + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); Assert.assertTrue("Continuous scheduling should be enabled.", - fs.isContinuousSchedulingEnabled()); + scheduler.isContinuousSchedulingEnabled()); // Add two nodes RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); - fs.handle(nodeEvent1); + scheduler.handle(nodeEvent1); RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 2, "127.0.0.2"); NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2); - fs.handle(nodeEvent2); + scheduler.handle(nodeEvent2); // available resource - Assert.assertEquals(fs.getClusterResource().getMemory(), 16 * 1024); - Assert.assertEquals(fs.getClusterResource().getVirtualCores(), 16); + Assert.assertEquals(scheduler.getClusterResource().getMemory(), 16 * 1024); + Assert.assertEquals(scheduler.getClusterResource().getVirtualCores(), 16); // send application request ApplicationAttemptId appAttemptId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", false); - fs.addApplicationAttempt(appAttemptId, false, false); + scheduler.addApplication(appAttemptId.getApplicationId(), "queue11", "user11", false); + scheduler.addApplicationAttempt(appAttemptId, false, false); List ask = new ArrayList(); ResourceRequest request = createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); ask.add(request); - fs.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); // waiting for continuous_scheduler_sleep_time // at least one pass - Thread.sleep(fs.getConf().getContinuousSchedulingSleepMs() + 500); + Thread.sleep(scheduler.getConf().getContinuousSchedulingSleepMs() + 500); - FSAppAttempt app = fs.getSchedulerApp(appAttemptId); + FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId); // Wait until app gets resources. while (app.getCurrentConsumption().equals(Resources.none())) { } @@ -3346,7 +3354,7 @@ public void testContinuousScheduling() throws Exception { createResourceRequest(1024, 1, ResourceRequest.ANY, 2, 1, true); ask.clear(); ask.add(request); - fs.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); // Wait until app gets resources while (app.getCurrentConsumption() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java index b3dc35f3cfd2f..a0d5d84bdc43c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java @@ -161,7 +161,7 @@ public void testTokenExpiry() throws Exception { .getEventHandler() .handle( new RMAppAttemptContainerFinishedEvent(applicationAttemptId, - containerStatus)); + containerStatus, nm1.getNodeId())); // Make sure the RMAppAttempt is at Finished State. // Both AMRMToken and ClientToAMToken have been removed. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java index 6797165dfe09f..3f82d72abc617 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java @@ -27,6 +27,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.LinkedList; import java.util.List; import org.apache.commons.logging.Log; @@ -49,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -77,6 +79,9 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; +import com.google.common.io.ByteArrayDataInput; +import com.google.common.io.ByteStreams; + @RunWith(Parameterized.class) public class TestContainerManagerSecurity extends KerberosSecurityTestcase { @@ -134,7 +139,7 @@ public TestContainerManagerSecurity(Configuration conf) { this.conf = conf; } - @Test (timeout = 1000000) + @Test (timeout = 120000) public void testContainerManager() throws Exception { try { yarnCluster = new MiniYARNCluster(TestContainerManagerSecurity.class @@ -158,6 +163,25 @@ public void testContainerManager() throws Exception { } } } + + @Test (timeout = 120000) + public void testContainerManagerWithEpoch() throws Exception { + try { + yarnCluster = new MiniYARNCluster(TestContainerManagerSecurity.class + .getName(), 1, 1, 1); + yarnCluster.init(conf); + yarnCluster.start(); + + // Testing for container token tampering + testContainerTokenWithEpoch(conf); + + } finally { + if (yarnCluster != null) { + yarnCluster.stop(); + yarnCluster = null; + } + } + } private void testNMTokens(Configuration conf) throws Exception { NMTokenSecretManagerInRM nmTokenSecretManagerRM = @@ -289,7 +313,7 @@ private void testNMTokens(Configuration conf) throws Exception { // trying to stop the container. It should not throw any exception. testStopContainer(rpc, validAppAttemptId, validNode, validContainerId, validNMToken, false); - + // Rolling over master key twice so that we can check whether older keys // are used for authentication. rollNMTokenMasterKey(nmTokenSecretManagerRM, nmTokenSecretManagerNM); @@ -304,7 +328,7 @@ private void testNMTokens(Configuration conf) throws Exception { sb.append(" was recently stopped on node manager"); Assert.assertTrue(testGetContainer(rpc, validAppAttemptId, validNode, validContainerId, validNMToken, true).contains(sb.toString())); - + // Now lets remove the container from nm-memory nm.getNodeStatusUpdater().clearFinishedContainersFromCache(); @@ -333,14 +357,22 @@ private void testNMTokens(Configuration conf) throws Exception { private void waitForContainerToFinishOnNM(ContainerId containerId) { Context nmContet = yarnCluster.getNodeManager(0).getNMContext(); int interval = 4 * 60; // Max time for container token to expire. + Assert.assertNotNull(nmContet.getContainers().containsKey(containerId)); while ((interval-- > 0) - && nmContet.getContainers().containsKey(containerId)) { + && !nmContet.getContainers().get(containerId) + .cloneAndGetContainerStatus().getState() + .equals(ContainerState.COMPLETE)) { try { + LOG.info("Waiting for " + containerId + " to complete."); Thread.sleep(1000); } catch (InterruptedException e) { } } - Assert.assertFalse(nmContet.getContainers().containsKey(containerId)); + // Normally, Containers will be removed from NM context after they are + // explicitly acked by RM. Now, manually remove it for testing. + yarnCluster.getNodeManager(0).getNodeStatusUpdater() + .addCompletedContainer(containerId); + nmContet.getContainers().remove(containerId); } protected void waitForNMToReceiveNMTokenKey( @@ -603,4 +635,74 @@ private void testContainerToken(Configuration conf) throws IOException, Assert.assertTrue(testStartContainer(rpc, appAttemptId, nodeId, containerToken, nmToken, true).contains(sb.toString())); } + + /** + * This tests whether a containerId is serialized/deserialized with epoch. + * + * @throws IOException + * @throws InterruptedException + * @throws YarnException + */ + private void testContainerTokenWithEpoch(Configuration conf) + throws IOException, InterruptedException, YarnException { + + LOG.info("Running test for serializing/deserializing containerIds"); + + NMTokenSecretManagerInRM nmTokenSecretManagerInRM = + yarnCluster.getResourceManager().getRMContext() + .getNMTokenSecretManager(); + ApplicationId appId = ApplicationId.newInstance(1, 1); + ApplicationAttemptId appAttemptId = + ApplicationAttemptId.newInstance(appId, 0); + ContainerId cId = ContainerId.newInstance(appAttemptId, (5L << 40) | 3L); + NodeManager nm = yarnCluster.getNodeManager(0); + NMTokenSecretManagerInNM nmTokenSecretManagerInNM = + nm.getNMContext().getNMTokenSecretManager(); + String user = "test"; + + waitForNMToReceiveNMTokenKey(nmTokenSecretManagerInNM, nm); + + NodeId nodeId = nm.getNMContext().getNodeId(); + + // Both id should be equal. + Assert.assertEquals(nmTokenSecretManagerInNM.getCurrentKey().getKeyId(), + nmTokenSecretManagerInRM.getCurrentKey().getKeyId()); + + // Creating a normal Container Token + RMContainerTokenSecretManager containerTokenSecretManager = + yarnCluster.getResourceManager().getRMContext(). + getContainerTokenSecretManager(); + Resource r = Resource.newInstance(1230, 2); + Token containerToken = + containerTokenSecretManager.createContainerToken(cId, nodeId, user, r, + Priority.newInstance(0), 0); + + ByteArrayDataInput input = ByteStreams.newDataInput( + containerToken.getIdentifier().array()); + ContainerTokenIdentifier containerTokenIdentifier = + new ContainerTokenIdentifier(); + containerTokenIdentifier.readFields(input); + Assert.assertEquals(cId, containerTokenIdentifier.getContainerID()); + Assert.assertEquals( + cId.toString(), containerTokenIdentifier.getContainerID().toString()); + + Token nmToken = + nmTokenSecretManagerInRM.createNMToken(appAttemptId, nodeId, user); + + YarnRPC rpc = YarnRPC.create(conf); + testStartContainer(rpc, appAttemptId, nodeId, containerToken, nmToken, + false); + + List containerIds = new LinkedList(); + containerIds.add(cId); + ContainerManagementProtocol proxy + = getContainerManagementProtocolProxy(rpc, nmToken, nodeId, user); + GetContainerStatusesResponse res = proxy.getContainerStatuses( + GetContainerStatusesRequest.newInstance(containerIds)); + Assert.assertNotNull(res.getContainerStatuses().get(0)); + Assert.assertEquals( + cId, res.getContainerStatuses().get(0).getContainerId()); + Assert.assertEquals(cId.toString(), + res.getContainerStatuses().get(0).getContainerId().toString()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index df6142283813a..dcf94869cf2b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -246,8 +246,8 @@ Allocation file format * maxAMShare: limit the fraction of the queue's fair share that can be used to run application masters. This property can only be used for leaf queues. For example, if set to 1.0f, then AMs in the leaf queue can take up to 100% - of both the memory and CPU fair share. The default value is -1.0f, which - means that this check is disabled. + of both the memory and CPU fair share. The value of -1.0f will disable + this feature and the amShare will not be checked. The default value is 0.5f. * weight: to share the cluster non-proportionally with other queues. Weights default to 1, and a queue with weight 2 should receive approximately twice