diff --git a/.asf.yaml b/.asf.yaml
index 91483dfed336c..e1c11790c8bfb 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -22,4 +22,4 @@ notifications:
commits: common-commits@hadoop.apache.org
issues: common-issues@hadoop.apache.org
pullrequests: common-issues@hadoop.apache.org
- jira_options: link label worklog
\ No newline at end of file
+ jira_options: comment link label
\ No newline at end of file
diff --git a/LICENSE-binary b/LICENSE-binary
index 499485263906a..d9762b14c34a5 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -215,15 +215,15 @@ com.aliyun:aliyun-java-sdk-ecs:4.2.0
com.aliyun:aliyun-java-sdk-ram:3.0.0
com.aliyun:aliyun-java-sdk-sts:3.0.0
com.aliyun.oss:aliyun-sdk-oss:3.13.2
-com.amazonaws:aws-java-sdk-bundle:1.11.901
+com.amazonaws:aws-java-sdk-bundle:1.12.262
com.cedarsoftware:java-util:1.9.0
com.cedarsoftware:json-io:2.5.1
-com.fasterxml.jackson.core:jackson-annotations:2.13.2
-com.fasterxml.jackson.core:jackson-core:2.13.2
-com.fasterxml.jackson.core:jackson-databind:2.13.2.2
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.13.2
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.13.2
-com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.13.2
+com.fasterxml.jackson.core:jackson-annotations:2.12.7
+com.fasterxml.jackson.core:jackson-core:2.12.7
+com.fasterxml.jackson.core:jackson-databind:2.12.7
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.12.7
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.12.7
+com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7
com.fasterxml.uuid:java-uuid-generator:3.1.4
com.fasterxml.woodstox:woodstox-core:5.3.0
com.github.davidmoten:rxjava-extras:0.8.0.17
@@ -251,7 +251,7 @@ commons-collections:commons-collections:3.2.2
commons-daemon:commons-daemon:1.0.13
commons-io:commons-io:2.8.0
commons-logging:commons-logging:1.1.3
-commons-net:commons-net:3.6
+commons-net:commons-net:3.8.0
de.ruedigermoeller:fst:2.50
io.grpc:grpc-api:1.26.0
io.grpc:grpc-context:1.26.0
@@ -261,17 +261,36 @@ io.grpc:grpc-protobuf:1.26.0
io.grpc:grpc-protobuf-lite:1.26.0
io.grpc:grpc-stub:1.26.0
io.netty:netty:3.10.6.Final
-io.netty:netty-all:4.1.42.Final
-io.netty:netty-buffer:4.1.27.Final
-io.netty:netty-codec:4.1.27.Final
-io.netty:netty-codec-http:4.1.27.Final
-io.netty:netty-codec-http2:4.1.27.Final
-io.netty:netty-codec-socks:4.1.27.Final
-io.netty:netty-common:4.1.27.Final
-io.netty:netty-handler:4.1.27.Final
-io.netty:netty-handler-proxy:4.1.27.Final
-io.netty:netty-resolver:4.1.27.Final
-io.netty:netty-transport:4.1.27.Final
+io.netty:netty-all:4.1.77.Final
+io.netty:netty-buffer:4.1.77.Final
+io.netty:netty-codec:4.1.77.Final
+io.netty:netty-codec-dns:4.1.77.Final
+io.netty:netty-codec-haproxy:4.1.77.Final
+io.netty:netty-codec-http:4.1.77.Final
+io.netty:netty-codec-http2:4.1.77.Final
+io.netty:netty-codec-memcache:4.1.77.Final
+io.netty:netty-codec-mqtt:4.1.77.Final
+io.netty:netty-codec-redis:4.1.77.Final
+io.netty:netty-codec-smtp:4.1.77.Final
+io.netty:netty-codec-socks:4.1.77.Final
+io.netty:netty-codec-stomp:4.1.77.Final
+io.netty:netty-codec-xml:4.1.77.Final
+io.netty:netty-common:4.1.77.Final
+io.netty:netty-handler:4.1.77.Final
+io.netty:netty-handler-proxy:4.1.77.Final
+io.netty:netty-resolver:4.1.77.Final
+io.netty:netty-resolver-dns:4.1.77.Final
+io.netty:netty-transport:4.1.77.Final
+io.netty:netty-transport-rxtx:4.1.77.Final
+io.netty:netty-transport-sctp:4.1.77.Final
+io.netty:netty-transport-udt:4.1.77.Final
+io.netty:netty-transport-classes-epoll:4.1.77.Final
+io.netty:netty-transport-native-unix-common:4.1.77.Final
+io.netty:netty-transport-classes-kqueue:4.1.77.Final
+io.netty:netty-resolver-dns-classes-macos:4.1.77.Final
+io.netty:netty-transport-native-epoll:4.1.77.Final
+io.netty:netty-transport-native-kqueue:4.1.77.Final
+io.netty:netty-resolver-dns-native-macos:4.1.77.Final
io.opencensus:opencensus-api:0.12.3
io.opencensus:opencensus-contrib-grpc-metrics:0.12.3
io.reactivex:rxjava:1.3.8
@@ -290,7 +309,7 @@ org.apache.commons:commons-configuration2:2.1.1
org.apache.commons:commons-csv:1.0
org.apache.commons:commons-digester:1.8.1
org.apache.commons:commons-lang3:3.12.0
-org.apache.commons:commons-math3:3.1.1
+org.apache.commons:commons-math3:3.6.1
org.apache.commons:commons-text:1.4
org.apache.commons:commons-validator:1.6
org.apache.curator:curator-client:5.2.0
@@ -306,44 +325,44 @@ org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.6
org.apache.httpcomponents:httpcore:4.4.10
org.apache.kafka:kafka-clients:2.8.1
-org.apache.kerby:kerb-admin:1.0.1
-org.apache.kerby:kerb-client:1.0.1
-org.apache.kerby:kerb-common:1.0.1
-org.apache.kerby:kerb-core:1.0.1
-org.apache.kerby:kerb-crypto:1.0.1
-org.apache.kerby:kerb-identity:1.0.1
-org.apache.kerby:kerb-server:1.0.1
-org.apache.kerby:kerb-simplekdc:1.0.1
-org.apache.kerby:kerb-util:1.0.1
-org.apache.kerby:kerby-asn1:1.0.1
-org.apache.kerby:kerby-config:1.0.1
-org.apache.kerby:kerby-pkix:1.0.1
-org.apache.kerby:kerby-util:1.0.1
-org.apache.kerby:kerby-xdr:1.0.1
-org.apache.kerby:token-provider:1.0.1
+org.apache.kerby:kerb-admin:2.0.2
+org.apache.kerby:kerb-client:2.0.2
+org.apache.kerby:kerb-common:2.0.2
+org.apache.kerby:kerb-core:2.0.2
+org.apache.kerby:kerb-crypto:2.0.2
+org.apache.kerby:kerb-identity:2.0.2
+org.apache.kerby:kerb-server:2.0.2
+org.apache.kerby:kerb-simplekdc:2.0.2
+org.apache.kerby:kerb-util:2.0.2
+org.apache.kerby:kerby-asn1:2.0.2
+org.apache.kerby:kerby-config:2.0.2
+org.apache.kerby:kerby-pkix:2.0.2
+org.apache.kerby:kerby-util:2.0.2
+org.apache.kerby:kerby-xdr:2.0.2
+org.apache.kerby:token-provider:2.0.2
org.apache.solr:solr-solrj:8.8.2
org.apache.yetus:audience-annotations:0.5.0
org.apache.zookeeper:zookeeper:3.6.3
org.codehaus.jettison:jettison:1.1
-org.eclipse.jetty:jetty-annotations:9.4.44.v20210927
-org.eclipse.jetty:jetty-http:9.4.44.v20210927
-org.eclipse.jetty:jetty-io:9.4.44.v20210927
-org.eclipse.jetty:jetty-jndi:9.4.44.v20210927
-org.eclipse.jetty:jetty-plus:9.4.44.v20210927
-org.eclipse.jetty:jetty-security:9.4.44.v20210927
-org.eclipse.jetty:jetty-server:9.4.44.v20210927
-org.eclipse.jetty:jetty-servlet:9.4.44.v20210927
-org.eclipse.jetty:jetty-util:9.4.44.v20210927
-org.eclipse.jetty:jetty-util-ajax:9.4.44.v20210927
-org.eclipse.jetty:jetty-webapp:9.4.44.v20210927
-org.eclipse.jetty:jetty-xml:9.4.44.v20210927
-org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.44.v20210927
-org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.44.v20210927
+org.eclipse.jetty:jetty-annotations:9.4.48.v20220622
+org.eclipse.jetty:jetty-http:9.4.48.v20220622
+org.eclipse.jetty:jetty-io:9.4.48.v20220622
+org.eclipse.jetty:jetty-jndi:9.4.48.v20220622
+org.eclipse.jetty:jetty-plus:9.4.48.v20220622
+org.eclipse.jetty:jetty-security:9.4.48.v20220622
+org.eclipse.jetty:jetty-server:9.4.48.v20220622
+org.eclipse.jetty:jetty-servlet:9.4.48.v20220622
+org.eclipse.jetty:jetty-util:9.4.48.v20220622
+org.eclipse.jetty:jetty-util-ajax:9.4.48.v20220622
+org.eclipse.jetty:jetty-webapp:9.4.48.v20220622
+org.eclipse.jetty:jetty-xml:9.4.48.v20220622
+org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622
+org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622
org.ehcache:ehcache:3.3.1
org.lz4:lz4-java:1.7.1
org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.0.5
-org.yaml:snakeyaml:1.16:
+org.yaml:snakeyaml:1.31:
org.wildfly.openssl:wildfly-openssl:1.0.7.Final
@@ -416,7 +435,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage
bootstrap v3.3.6
broccoli-asset-rev v2.4.2
broccoli-funnel v1.0.1
-datatables v1.10.8
+datatables v1.10.19
em-helpers v0.5.13
em-table v0.1.6
ember v2.2.0
@@ -491,7 +510,6 @@ javax.annotation:javax.annotation-api:1.3.2
javax.servlet:javax.servlet-api:3.1.0
javax.servlet.jsp:jsp-api:2.1
javax.websocket:javax.websocket-api:1.0
-javax.ws.rs:javax.ws.rs-api:2.1.1
javax.ws.rs:jsr311-api:1.1.1
javax.xml.bind:jaxb-api:2.2.11
diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml
index 82f2f8a778087..b4b81011eb517 100644
--- a/hadoop-client-modules/hadoop-client-api/pom.xml
+++ b/hadoop-client-modules/hadoop-client-api/pom.xml
@@ -98,13 +98,6 @@
truetrue
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -254,8 +247,7 @@
-
-
+ NOTICE.txt
diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
index 4c8900dc2af0d..208345d5f5a53 100644
--- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml
+++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
@@ -671,13 +671,6 @@
org.apache.maven.pluginsmaven-shade-plugin
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -704,7 +697,6 @@
org.bouncycastle:*org.xerial.snappy:*
- javax.ws.rs:javax.ws.rs-api
@@ -1053,8 +1045,7 @@
-
-
+
diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml
index 98756c2439544..b2bd7a4fc43c2 100644
--- a/hadoop-client-modules/hadoop-client-runtime/pom.xml
+++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml
@@ -128,13 +128,6 @@
org.apache.maven.pluginsmaven-shade-plugin
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -163,7 +156,6 @@
org.bouncycastle:*org.xerial.snappy:*
- javax.ws.rs:javax.ws.rs-api
@@ -398,8 +390,7 @@
-->
-
-
+
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml
new file mode 100644
index 0000000000000..10a4f0d5f16e5
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml
@@ -0,0 +1,35426 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKeys
+ @param customMessage
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKey
+ @param customMessage]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param out the writer to write to.]]>
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+ default port;]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ <---X--->
+ fs://host/A/B/link
+ <-----Y----->
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file linkcode> already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() <= start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
+
?
+
Matches any single character.
+
+
+
*
+
Matches zero or more characters.
+
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements WritableComparable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Enum type
+ @param in DataInput to read from
+ @param enumType Class type of Enum
+ @return Enum represented by String read from DataInput
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ len number of bytes in input streamin
+ @param in input stream
+ @param len number of bytes to skip
+ @throws IOException when skipped less number of bytes]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Compressor
+ @param conf the Configuration object which contains confs for creating or reinit the compressor
+ @return Compressor for the given
+ CompressionCodec from the pool or a new one]]>
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Decompressor
+ @return Decompressor for the given
+ CompressionCodec the pool or a new one]]>
+
+
+
+
+
+ Compressor to be returned to the pool]]>
+
+
+
+
+
+ Decompressor to be returned to the
+ pool]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
+ byte[4]=(n>>8)&0xff; byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
+ byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
+ byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
+ byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
+ byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
+ byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
+ NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
+ (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>&quet;, where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters.
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters.
+
+ @param serviceName
+ @param nameName
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mapping
+ and mapping]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /host@realm.
+ @param principalName principal name of format as described above
+ @return host name if the the string conforms to the above format, else null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "jack"
+
+ @param userName
+ @return userName without login method]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method]]>
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method
+ @throws IOException if the action throws an IOException
+ @throws Error if the action throws an Error
+ @throws RuntimeException if the action throws a RuntimeException
+ @throws InterruptedException if the action throws an InterruptedException
+ @throws UndeclaredThrowableException if the action throws something else]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CredentialProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ does not provide the stack trace for security purposes.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A User-Agent String is considered to be a browser if it matches
+ any of the regex patterns from browser-useragent-regex; the default
+ behavior is to consider everything a browser that matches the following:
+ "^Mozilla.*,^Opera.*". Subclasses can optionally override
+ this method to use different behavior.
+
+ @param userAgent The User-Agent String, or null if there isn't one
+ @return true if the User-Agent String refers to a browser, false if not]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The type of the token identifier]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ T extends TokenIdentifier]]>
+
+
+
+
+
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ live.
+ @return a (snapshotted) map of blocker name->description values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Do nothing if the service is null or not
+ in a state in which it can be/needs to be stopped.
+
+ The service state is checked before the operation begins.
+ This process is not thread safe.
+ @param service a service or null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool, is the standard for any Map-Reduce tool/application.
+ The tool/application should delegate the handling of
+
+ standard command-line options to {@link ToolRunner#run(Tool, String[])}
+ and only handle its custom arguments.
+
+
Here is how a typical Tool is implemented:
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr <= n (n is
+ the cardinality of the set A to record in the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml
new file mode 100644
index 0000000000000..62a0e09f121af
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml
@@ -0,0 +1,39037 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @param customMessage depcrication message
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKey key that take up the values of deprecated key
+ @param customMessage deprecation message]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ As a side effect get loads the properties from the sources if called for
+ the first time as a lazy init.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value
+ @param enumeration type]]>
+
+
+
+
+
+
+ enumeration type
+ @throws IllegalArgumentException If mapping is illegal for the type
+ provided
+ @return enumeration type]]>
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param out the writer to write to.]]>
+
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications, must
+ include entries for user, group, and others for compatibility with
+ permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ This does not change the current offset of a file, and is thread-safe.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if reached
+ end-of-stream
+ @throws IOException if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+ This operation provides similar semantics to
+ {@link #read(long, ByteBuffer)}, the difference is that this method is
+ guaranteed to read data until the {@link ByteBuffer} is full, or until
+ the end of the data stream is reached.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @throws IOException if there is some error performing the read
+ @throws EOFException the end of the data was reached before
+ the read operation completed
+ @see #read(long, ByteBuffer)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ {@literal <---}X{@literal --->}
+ fs://host/A/B/link
+ {@literal <-----}Y{@literal ----->}
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file link already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries
+ to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications, must include entries for user, group, and others for
+ compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns
+ each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal <}String{@literal >} of the XAttr names of the
+ file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() {@literal <=} start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
?
+
Matches any single character.
+
+
+
*
+
Matches zero or more characters.
+
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal } of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+ Consult the filesystem specification document for the requirements
+ of an implementation of this interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Exceptions are caught and downgraded to debug logging.
+ @param source source of statistics.
+ @return a string for logging.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is designed to affordable to use in log statements.
+ @param source source of statistics -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is for use in log statements where for the cost of creation
+ of this entry is low; it is affordable to use in log statements.
+ @param statistics statistics to stringify -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is serializable so that frameworks which can use java serialization
+ to propagate data (Spark, Flink...) can send the statistics
+ back. For this reason, TreeMaps are explicitly used as field types,
+ even though IDEs can recommend use of Map instead.
+ For security reasons, untrusted java object streams should never be
+ deserialized. If for some reason this is required, use
+ {@link #requiredSerializationClasses()} to get the list of classes
+ used when deserializing instances of this object.
+
+
+ It is annotated for correct serializations with jackson2.
+
]]>
+
+
+
+
+
+
+
+
+
+ This is not an atomic option.
+
+ The instance can be serialized, and its
+ {@code toString()} method lists all the values.
+ @param statistics statistics
+ @return a snapshot of the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used to accrue values so as to dynamically update
+ the mean. If so, know that there is no synchronization
+ on the methods.
+
+
+ If a statistic has 0 samples then it is considered to be empty.
+
+
+ All 'empty' statistics are equivalent, independent of the sum value.
+
+
+ For non-empty statistics, sum and sample values must match
+ for equality.
+
+
+ It is serializable and annotated for correct serializations with jackson2.
+
+
+ Thread safety. The operations to add/copy sample data, are thread safe.
+
+
+
{@link #add(MeanStatistic)}
+
{@link #addSample(long)}
+
{@link #clear()}
+
{@link #setSamplesAndSum(long, long)}
+
{@link #set(MeanStatistic)}
+
{@link #setSamples(long)} and {@link #setSum(long)}
+
+
+ So is the {@link #mean()} method. This ensures that when
+ used to aggregated statistics, the aggregate value and sample
+ count are set and evaluated consistently.
+
+
+ Other methods marked as synchronized because Findbugs overreacts
+ to the idea that some operations to update sum and sample count
+ are synchronized, but that things like equals are not.
+
+ The name of the constants are uppercase, words separated by
+ underscores.
+
+
+ The value of the constants are lowercase of the constant names.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements
+ WritableComparable{@literal } {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec
+ io.compress.passthrough.extension = .gz
+
+
+ Note: this is not a Splittable codec: it doesn't know the
+ capabilities of the passed in stream. It should be possible to
+ extend this in a subclass: the inner classes are marked as protected
+ to enable this. Do not retrofit splitting to this class..]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff;
+ byte[3]=n&0xff.
+ Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff;
+ byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff;
+ byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
+ byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff;
+ byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
+ byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff;
+ byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]=
+ (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff;
+ byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff;
+ byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=
+ (n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 +
+ (NB[0]&0xff)<<8 + NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)
+ <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>", where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+ @param exitCode exit code
+ @param cause inner cause
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will be 0 until a call
+ to {@link #finished()} has been made.
+ @return the currently recorded duration.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr {@literal <=} n
+ (n is the cardinality of the set A to record in
+ the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown
+ @throws TimeoutException the future timed out.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+
If it is an IOE: Return.
+
If it is a {@link UncheckedIOException}: return the cause
+
Completion/Execution Exceptions: extract and repeat
+
If it is an RTE or Error: throw.
+
Any other type: wrap in an IOE
+
+
+ Recursively handles wrapped Execution and Completion Exceptions in
+ case something very complicated has happened.
+ @param e exception.
+ @return an IOException extracted or built from the cause.
+ @throws RuntimeException if that is the inner cause.
+ @throws Error if that is the inner cause.]]>
+
+
+
+
+ Contains methods promoted from
+ {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they
+ are a key part of integrating async IO in application code.
+
+
+ One key feature is that the {@link #awaitFuture(Future)} and
+ {@link #awaitFuture(Future, long, TimeUnit)} calls will
+ extract and rethrow exceptions raised in the future's execution,
+ including extracting the inner IOException of any
+ {@code UncheckedIOException} raised in the future.
+ This makes it somewhat easier to execute IOException-raising
+ code inside futures.
+
]]>
+
+
+
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @param mapper transformation
+ @return a remote iterator]]>
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @return a remote iterator]]>
+
+
+
+
+
+
+
+ Elements are filtered in the hasNext() method; if not used
+ the filtering will be done on demand in the {@code next()}
+ call.
+ @param type
+ @param iterator source
+ @param filter filter
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type.
+ @return a new iterator]]>
+
+
+
+
+
+
+ type
+ @return a list of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ type
+ @return an array of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+
+ If the iterator is an IOStatisticsSource returning a non-null
+ set of statistics, and this classes log is set to DEBUG,
+ then the statistics of the operation are evaluated and logged at
+ debug.
+
+ The number of entries processed is returned, as it is useful to
+ know this, especially during tests or when reporting values
+ to users.
+
+ This does not close the iterator afterwards.
+ @param source iterator source
+ @param consumer consumer of the values.
+ @return the number of elements processed
+ @param type of source
+ @throws IOException if the source RemoteIterator or the consumer raise one.]]>
+
+
+
+
+
+ type of source]]>
+
+
+
+
+ This aims to make it straightforward to use lambda-expressions to
+ transform the results of an iterator, without losing the statistics
+ in the process, and to chain the operations together.
+
+ The closeable operation will be passed through RemoteIterators which
+ wrap other RemoteIterators. This is to support any iterator which
+ can be closed to release held connections, file handles etc.
+ Unless client code is written to assume that RemoteIterator instances
+ may be closed, this is not likely to be broadly used. It is added
+ to make it possible to adopt this feature in a managed way.
+
+ One notable feature is that the
+ {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will
+ LOG at debug any IOStatistics provided by the iterator, if such
+ statistics are provided. There's no attempt at retrieval and logging
+ if the LOG is not set to debug, so it is a zero cost feature unless
+ the logger {@code org.apache.hadoop.fs.functional.RemoteIterators}
+ is at DEBUG.
+
+ Based on the S3A Listing code, and some some work on moving other code
+ to using iterative listings so as to pick up the statistics.]]>
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index d8e2dd3542223..5b5ffe1b00641 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -841,6 +841,36 @@
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+ 1.8
+
+
+ validate
+
+ run
+
+
+ true
+
+
+
+
+
+
+
+
+ Skip platform toolset version detection = ${skip.platformToolsetDetection}
+
+
+
+
+ org.codehaus.mojoexec-maven-plugin
@@ -852,6 +882,7 @@
exec
+ ${skip.platformToolsetDetection}${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd${basedir}\src\main\winutils
@@ -866,6 +897,7 @@
exec
+ ${skip.platformToolsetDetection}msbuild${basedir}/src/main/winutils/winutils.sln
@@ -878,6 +910,27 @@
+
+ compile-ms-winutils-using-build-tools
+ compile
+
+ exec
+
+
+ ${skip.platformToolsetDetection.negated}
+ msbuild
+
+ ${basedir}/src/main/winutils/winutils.sln
+ /nologo
+ /p:Configuration=Release
+ /p:OutDir=${project.build.directory}/bin/
+ /p:IntermediateOutputPath=${project.build.directory}/winutils/
+ /p:WsceConfigDir=${wsce.config.dir}
+ /p:WsceConfigFile=${wsce.config.file}
+ /p:PlatformToolset=${use.platformToolsetVersion}
+
+
+ convert-ms-native-dllgenerate-sources
@@ -885,6 +938,7 @@
exec
+ ${skip.platformToolsetDetection}${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd${basedir}\src\main\native
@@ -899,6 +953,35 @@
exec
+ ${skip.platformToolsetDetection}
+ msbuild
+
+ ${basedir}/src/main/native/native.sln
+ /nologo
+ /p:Configuration=Release
+ /p:OutDir=${project.build.directory}/bin/
+ /p:CustomZstdPrefix=${zstd.prefix}
+ /p:CustomZstdLib=${zstd.lib}
+ /p:CustomZstdInclude=${zstd.include}
+ /p:RequireZstd=${require.zstd}
+ /p:CustomOpensslPrefix=${openssl.prefix}
+ /p:CustomOpensslLib=${openssl.lib}
+ /p:CustomOpensslInclude=${openssl.include}
+ /p:RequireOpenssl=${require.openssl}
+ /p:RequireIsal=${require.isal}
+ /p:CustomIsalPrefix=${isal.prefix}
+ /p:CustomIsalLib=${isal.lib}
+
+
+
+
+ compile-ms-native-dll-using-build-tools
+ compile
+
+ exec
+
+
+ ${skip.platformToolsetDetection.negated}msbuild${basedir}/src/main/native/native.sln
@@ -916,6 +999,7 @@
/p:RequireIsal=${require.isal}/p:CustomIsalPrefix=${isal.prefix}/p:CustomIsalLib=${isal.lib}
+ /p:PlatformToolset=${use.platformToolsetVersion}
@@ -1151,7 +1235,7 @@
src-test-compile-protoc-legacygenerate-test-sources
- compile
+ test-compilefalse
@@ -1160,7 +1244,7 @@
com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
false
- ${basedir}/src/test/proto
+ ${basedir}/src/test/proto${project.build.directory}/generated-test-sources/javafalse
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
index 1cca9fe2bfdb1..4c7569d6ecd81 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
@@ -174,6 +174,7 @@ private static class ChecksumFSInputChecker extends FSInputChecker implements
private static final int HEADER_LENGTH = 8;
private int bytesPerSum = 1;
+ private long fileLen = -1L;
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
throws IOException {
@@ -320,6 +321,18 @@ public static long findChecksumOffset(long dataOffset,
return HEADER_LENGTH + (dataOffset/bytesPerSum) * FSInputChecker.CHECKSUM_SIZE;
}
+ /**
+ * Calculate length of file if not already cached.
+ * @return file length.
+ * @throws IOException any IOE.
+ */
+ private long getFileLength() throws IOException {
+ if (fileLen == -1L) {
+ fileLen = fs.getFileStatus(file).getLen();
+ }
+ return fileLen;
+ }
+
/**
* Find the checksum ranges that correspond to the given data ranges.
* @param dataRanges the input data ranges, which are assumed to be sorted
@@ -371,13 +384,28 @@ static ByteBuffer checkBytes(ByteBuffer sumsBytes,
IntBuffer sums = sumsBytes.asIntBuffer();
sums.position(offset / FSInputChecker.CHECKSUM_SIZE);
ByteBuffer current = data.duplicate();
- int numChunks = data.remaining() / bytesPerSum;
+ int numFullChunks = data.remaining() / bytesPerSum;
+ boolean partialChunk = ((data.remaining() % bytesPerSum) != 0);
+ int totalChunks = numFullChunks;
+ if (partialChunk) {
+ totalChunks++;
+ }
CRC32 crc = new CRC32();
// check each chunk to ensure they match
- for(int c = 0; c < numChunks; ++c) {
- // set the buffer position and the limit
- current.limit((c + 1) * bytesPerSum);
+ for(int c = 0; c < totalChunks; ++c) {
+ // set the buffer position to the start of every chunk.
current.position(c * bytesPerSum);
+
+ if (c == numFullChunks) {
+ // During last chunk, there may be less than chunk size
+ // data preset, so setting the limit accordingly.
+ int lastIncompleteChunk = data.remaining() % bytesPerSum;
+ current.limit((c * bytesPerSum) + lastIncompleteChunk);
+ } else {
+ // set the buffer limit to end of every chunk.
+ current.limit((c + 1) * bytesPerSum);
+ }
+
// compute the crc
crc.reset();
crc.update(current);
@@ -396,11 +424,34 @@ static ByteBuffer checkBytes(ByteBuffer sumsBytes,
return data;
}
+ /**
+ * Validates range parameters.
+ * In case of CheckSum FS, we already have calculated
+ * fileLength so failing fast here.
+ * @param ranges requested ranges.
+ * @param fileLength length of file.
+ * @throws EOFException end of file exception.
+ */
+ private void validateRangeRequest(List extends FileRange> ranges,
+ final long fileLength) throws EOFException {
+ for (FileRange range : ranges) {
+ VectoredReadUtils.validateRangeRequest(range);
+ if (range.getOffset() + range.getLength() > fileLength) {
+ final String errMsg = String.format("Requested range [%d, %d) is beyond EOF for path %s",
+ range.getOffset(), range.getLength(), file);
+ LOG.warn(errMsg);
+ throw new EOFException(errMsg);
+ }
+ }
+ }
+
@Override
public void readVectored(List extends FileRange> ranges,
IntFunction allocate) throws IOException {
+ final long length = getFileLength();
+ validateRangeRequest(ranges, length);
+
// If the stream doesn't have checksums, just delegate.
- VectoredReadUtils.validateVectoredReadRanges(ranges);
if (sums == null) {
datas.readVectored(ranges, allocate);
return;
@@ -410,15 +461,18 @@ public void readVectored(List extends FileRange> ranges,
List dataRanges =
VectoredReadUtils.mergeSortedRanges(Arrays.asList(sortRanges(ranges)), bytesPerSum,
minSeek, maxReadSizeForVectorReads());
+ // While merging the ranges above, they are rounded up based on the value of bytesPerSum
+ // which leads to some ranges crossing the EOF thus they need to be fixed else it will
+ // cause EOFException during actual reads.
+ for (CombinedFileRange range : dataRanges) {
+ if (range.getOffset() + range.getLength() > length) {
+ range.setLength((int) (length - range.getOffset()));
+ }
+ }
List checksumRanges = findChecksumRanges(dataRanges,
bytesPerSum, minSeek, maxSize);
sums.readVectored(checksumRanges, allocate);
datas.readVectored(dataRanges, allocate);
- // Data read is correct. I have verified content of dataRanges.
- // There is some bug below here as test (testVectoredReadMultipleRanges)
- // is failing, should be
- // somewhere while slicing the merged data into smaller user ranges.
- // Spend some time figuring out but it is a complex code.
for(CombinedFileRange checksumRange: checksumRanges) {
for(FileRange dataRange: checksumRange.getUnderlying()) {
// when we have both the ranges, validate the checksum
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index 16144206eeea4..9d6224366d1ba 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -475,4 +475,21 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
* default hadoop temp dir on local system: {@value}.
*/
public static final String HADOOP_TMP_DIR = "hadoop.tmp.dir";
+
+ /**
+ * Thread-level IOStats Support.
+ * {@value}
+ */
+ public static final String IOSTATISTICS_THREAD_LEVEL_ENABLED =
+ "fs.iostatistics.thread.level.enabled";
+
+ /**
+ * Default value for Thread-level IOStats Support is true.
+ */
+ public static final boolean IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT =
+ true;
+
+ public static final String HADOOP_SECURITY_RESOLVER_IMPL =
+ "hadoop.security.resolver.impl";
+
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
index 6f6e30410659c..794855508c63f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
@@ -256,9 +256,8 @@ public void removeRenewAction(
try {
action.cancel();
} catch (InterruptedException ie) {
- LOG.error("Interrupted while canceling token for " + fs.getUri()
- + "filesystem");
- LOG.debug("Exception in removeRenewAction: {}", ie);
+ LOG.error("Interrupted while canceling token for {} filesystem.", fs.getUri());
+ LOG.debug("Exception in removeRenewAction.", ie);
}
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
index 52644402ca459..cca6c28da11a3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
@@ -144,7 +144,8 @@ public boolean seekToNewSource(long targetPos) throws IOException {
*
* @return the underlying input stream
*/
- @InterfaceAudience.LimitedPrivate({"HDFS"})
+ @InterfaceAudience.Public
+ @InterfaceStability.Stable
public InputStream getWrappedStream() {
return in;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index f6c9d3c7cb0dd..774e015b37343 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -396,6 +396,10 @@ public Path getLocalPathForWrite(String pathStr, long size,
Context ctx = confChanged(conf);
int numDirs = ctx.localDirs.length;
int numDirsSearched = 0;
+ // Max capacity in any directory
+ long maxCapacity = 0;
+ String errorText = null;
+ IOException diskException = null;
//remove the leading slash from the path (to make sure that the uri
//resolution results in a valid path on the dir being checked)
if (pathStr.startsWith("/")) {
@@ -444,9 +448,18 @@ public Path getLocalPathForWrite(String pathStr, long size,
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
while (numDirsSearched < numDirs) {
long capacity = ctx.dirDF[dirNum].getAvailable();
+ if (capacity > maxCapacity) {
+ maxCapacity = capacity;
+ }
if (capacity > size) {
- returnPath =
- createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
+ try {
+ returnPath = createPath(ctx.localDirs[dirNum], pathStr,
+ checkWrite);
+ } catch (IOException e) {
+ errorText = e.getMessage();
+ diskException = e;
+ LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
+ }
if (returnPath != null) {
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
break;
@@ -462,8 +475,13 @@ public Path getLocalPathForWrite(String pathStr, long size,
}
//no path found
- throw new DiskErrorException("Could not find any valid local " +
- "directory for " + pathStr);
+ String newErrorText = "Could not find any valid local directory for " +
+ pathStr + " with requested size " + size +
+ " as the max capacity in any directory is " + maxCapacity;
+ if (errorText != null) {
+ newErrorText = newErrorText + " due to " + errorText;
+ }
+ throw new DiskErrorException(newErrorText, diskException);
}
/** Creates a file on the local FS. Pass size as
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
index de76090512705..7380402eb6156 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
@@ -114,6 +114,16 @@ default int maxReadSizeForVectorReads() {
* As a result of the call, each range will have FileRange.setData(CompletableFuture)
* called with a future that when complete will have a ByteBuffer with the
* data from the file's range.
+ *
+ * The position returned by getPos() after readVectored() is undefined.
+ *
+ *
+ * If a file is changed while the readVectored() operation is in progress, the output is
+ * undefined. Some ranges may have old data, some may have new and some may have both.
+ *
+ *
+ * While a readVectored() operation is in progress, normal read api calls may block.
+ *
* @param ranges the byte ranges to read
* @param allocate the function to allocate ByteBuffer
* @throws IOException any IOE.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
index f525c3cba78fe..2f4f93099b5c9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
@@ -57,6 +57,8 @@
import org.apache.hadoop.fs.impl.StoreImplementationUtils;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
import org.apache.hadoop.fs.statistics.IOStatisticsSource;
import org.apache.hadoop.fs.statistics.BufferedIOStatisticsOutputStream;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
@@ -156,11 +158,19 @@ class LocalFSFileInputStream extends FSInputStream implements
/** Reference to the bytes read counter for slightly faster counting. */
private final AtomicLong bytesRead;
+ /**
+ * Thread level IOStatistics aggregator to update in close().
+ */
+ private final IOStatisticsAggregator
+ ioStatisticsAggregator;
+
public LocalFSFileInputStream(Path f) throws IOException {
name = pathToFile(f);
fis = new FileInputStream(name);
bytesRead = ioStatistics.getCounterReference(
STREAM_READ_BYTES);
+ ioStatisticsAggregator =
+ IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator();
}
@Override
@@ -193,9 +203,13 @@ public boolean seekToNewSource(long targetPos) throws IOException {
@Override
public void close() throws IOException {
- fis.close();
- if (asyncChannel != null) {
- asyncChannel.close();
+ try {
+ fis.close();
+ if (asyncChannel != null) {
+ asyncChannel.close();
+ }
+ } finally {
+ ioStatisticsAggregator.aggregate(ioStatistics);
}
}
@@ -278,6 +292,7 @@ public boolean hasCapability(String capability) {
// new capabilities.
switch (capability.toLowerCase(Locale.ENGLISH)) {
case StreamCapabilities.IOSTATISTICS:
+ case StreamCapabilities.IOSTATISTICS_CONTEXT:
case StreamCapabilities.VECTOREDIO:
return true;
default:
@@ -407,9 +422,19 @@ final class LocalFSFileOutputStream extends OutputStream implements
STREAM_WRITE_EXCEPTIONS)
.build();
+ /**
+ * Thread level IOStatistics aggregator to update in close().
+ */
+ private final IOStatisticsAggregator
+ ioStatisticsAggregator;
+
private LocalFSFileOutputStream(Path f, boolean append,
FsPermission permission) throws IOException {
File file = pathToFile(f);
+ // store the aggregator before attempting any IO.
+ ioStatisticsAggregator =
+ IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator();
+
if (!append && permission == null) {
permission = FsPermission.getFileDefault();
}
@@ -436,10 +461,17 @@ private LocalFSFileOutputStream(Path f, boolean append,
}
/*
- * Just forward to the fos
+ * Close the fos; update the IOStatisticsContext.
*/
@Override
- public void close() throws IOException { fos.close(); }
+ public void close() throws IOException {
+ try {
+ fos.close();
+ } finally {
+ ioStatisticsAggregator.aggregate(ioStatistics);
+ }
+ }
+
@Override
public void flush() throws IOException { fos.flush(); }
@Override
@@ -485,6 +517,7 @@ public boolean hasCapability(String capability) {
// new capabilities.
switch (capability.toLowerCase(Locale.ENGLISH)) {
case StreamCapabilities.IOSTATISTICS:
+ case StreamCapabilities.IOSTATISTICS_CONTEXT:
return true;
default:
return StoreImplementationUtils.isProbeForSyncable(capability);
@@ -1293,4 +1326,9 @@ public boolean hasPathCapability(final Path path, final String capability)
return super.hasPathCapability(path, capability);
}
}
+
+ @VisibleForTesting
+ static void setUseDeprecatedFileStatus(boolean useDeprecatedFileStatus) {
+ RawLocalFileSystem.useDeprecatedFileStatus = useDeprecatedFileStatus;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
index d68ef505dc3fe..c925e50889d53 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
@@ -93,6 +93,12 @@ public interface StreamCapabilities {
*/
String ABORTABLE_STREAM = CommonPathCapabilities.ABORTABLE_STREAM;
+ /**
+ * Streams that support IOStatistics context and capture thread-level
+ * IOStatistics.
+ */
+ String IOSTATISTICS_CONTEXT = "fs.capability.iocontext.supported";
+
/**
* Capabilities that a stream can support and be queried for.
*/
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
index 64107f1a18f89..50cab7dc4ccf8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
@@ -30,6 +30,7 @@
import org.apache.hadoop.fs.impl.CombinedFileRange;
import org.apache.hadoop.util.Preconditions;
+import org.apache.hadoop.util.functional.Function4RaisingIOE;
/**
* Utility class which implements helper methods used
@@ -37,6 +38,8 @@
*/
public final class VectoredReadUtils {
+ private static final int TMP_BUFFER_MAX_SIZE = 64 * 1024;
+
/**
* Validate a single range.
* @param range file range.
@@ -114,7 +117,12 @@ private static void readNonByteBufferPositionedReadable(PositionedReadable strea
FileRange range,
ByteBuffer buffer) throws IOException {
if (buffer.isDirect()) {
- buffer.put(readInDirectBuffer(stream, range));
+ readInDirectBuffer(range.getLength(),
+ buffer,
+ (position, buffer1, offset, length) -> {
+ stream.readFully(position, buffer1, offset, length);
+ return null;
+ });
buffer.flip();
} else {
stream.readFully(range.getOffset(), buffer.array(),
@@ -122,13 +130,34 @@ private static void readNonByteBufferPositionedReadable(PositionedReadable strea
}
}
- private static byte[] readInDirectBuffer(PositionedReadable stream,
- FileRange range) throws IOException {
- // if we need to read data from a direct buffer and the stream doesn't
- // support it, we allocate a byte array to use.
- byte[] tmp = new byte[range.getLength()];
- stream.readFully(range.getOffset(), tmp, 0, tmp.length);
- return tmp;
+ /**
+ * Read bytes from stream into a byte buffer using an
+ * intermediate byte array.
+ * @param length number of bytes to read.
+ * @param buffer buffer to fill.
+ * @param operation operation to use for reading data.
+ * @throws IOException any IOE.
+ */
+ public static void readInDirectBuffer(int length,
+ ByteBuffer buffer,
+ Function4RaisingIOE operation) throws IOException {
+ if (length == 0) {
+ return;
+ }
+ int readBytes = 0;
+ int position = 0;
+ int tmpBufferMaxSize = Math.min(TMP_BUFFER_MAX_SIZE, length);
+ byte[] tmp = new byte[tmpBufferMaxSize];
+ while (readBytes < length) {
+ int currentLength = (readBytes + tmpBufferMaxSize) < length ?
+ tmpBufferMaxSize
+ : (length - readBytes);
+ operation.apply(position, tmp, 0, currentLength);
+ buffer.put(tmp, 0, currentLength);
+ position = position + currentLength;
+ readBytes = readBytes + currentLength;
+ }
}
/**
@@ -210,6 +239,7 @@ public static List extends FileRange> validateNonOverlappingAndReturnSortedRan
if (sortedRanges[i].getOffset() < prev.getOffset() + prev.getLength()) {
throw new UnsupportedOperationException("Overlapping ranges are not supported");
}
+ prev = sortedRanges[i];
}
return Arrays.asList(sortedRanges);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java
index b24bef2a816bf..16fe0da7c5a81 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs.impl;
+import java.lang.ref.WeakReference;
import java.util.function.Consumer;
import java.util.function.Function;
import javax.annotation.Nullable;
@@ -48,7 +49,17 @@ public long currentThreadId() {
}
public V setForCurrentThread(V newVal) {
- return put(currentThreadId(), newVal);
+ long id = currentThreadId();
+
+ // if the same object is already in the map, just return it.
+ WeakReference ref = lookup(id);
+ // Reference value could be set to null. Thus, ref.get() could return
+ // null. Should be handled accordingly while using the returned value.
+ if (ref != null && ref.get() == newVal) {
+ return ref.get();
+ }
+
+ return put(id, newVal);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
new file mode 100644
index 0000000000000..c18dc519188ba
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Provides functionality necessary for caching blocks of data read from FileSystem.
+ */
+public interface BlockCache extends Closeable {
+
+ /**
+ * Indicates whether the given block is in this cache.
+ *
+ * @param blockNumber the id of the given block.
+ * @return true if the given block is in this cache, false otherwise.
+ */
+ boolean containsBlock(int blockNumber);
+
+ /**
+ * Gets the blocks in this cache.
+ *
+ * @return the blocks in this cache.
+ */
+ Iterable blocks();
+
+ /**
+ * Gets the number of blocks in this cache.
+ *
+ * @return the number of blocks in this cache.
+ */
+ int size();
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * @param blockNumber the id of the desired block.
+ * @param buffer contents of the desired block are copied to this buffer.
+ * @throws IOException if there is an error reading the given block.
+ */
+ void get(int blockNumber, ByteBuffer buffer) throws IOException;
+
+ /**
+ * Puts the given block in this cache.
+ *
+ * @param blockNumber the id of the given block.
+ * @param buffer contents of the given block to be added to this cache.
+ * @throws IOException if there is an error writing the given block.
+ */
+ void put(int blockNumber, ByteBuffer buffer) throws IOException;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java
new file mode 100644
index 0000000000000..ecb8bc7243be0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkWithinRange;
+
+/**
+ * Holds information about blocks of data in a file.
+ */
+public final class BlockData {
+
+ // State of each block of data.
+ enum State {
+
+ /** Data is not yet ready to be read from this block (still being prefetched). */
+ NOT_READY,
+
+ /** A read of this block has been enqueued in the prefetch queue. */
+ QUEUED,
+
+ /** A read of this block has been enqueued in the prefetch queue. */
+ READY,
+
+ /** This block has been cached in the local disk cache. */
+ CACHED
+ }
+
+ /**
+ * State of all blocks in a file.
+ */
+ private State[] state;
+
+ /**
+ * The size of a file.
+ */
+ private final long fileSize;
+
+ /**
+ * The file is divided into blocks of this size.
+ */
+ private final int blockSize;
+
+ /**
+ * The file has these many blocks.
+ */
+ private final int numBlocks;
+
+ /**
+ * Constructs an instance of {@link BlockData}.
+ * @param fileSize the size of a file.
+ * @param blockSize the file is divided into blocks of this size.
+ * @throws IllegalArgumentException if fileSize is negative.
+ * @throws IllegalArgumentException if blockSize is negative.
+ * @throws IllegalArgumentException if blockSize is zero or negative.
+ */
+ public BlockData(long fileSize, int blockSize) {
+ checkNotNegative(fileSize, "fileSize");
+ if (fileSize == 0) {
+ checkNotNegative(blockSize, "blockSize");
+ } else {
+ checkPositiveInteger(blockSize, "blockSize");
+ }
+
+ this.fileSize = fileSize;
+ this.blockSize = blockSize;
+ this.numBlocks =
+ (fileSize == 0)
+ ? 0
+ : ((int) (fileSize / blockSize)) + (fileSize % blockSize > 0
+ ? 1
+ : 0);
+ this.state = new State[this.numBlocks];
+ for (int b = 0; b < this.numBlocks; b++) {
+ setState(b, State.NOT_READY);
+ }
+ }
+
+ /**
+ * Gets the size of each block.
+ * @return the size of each block.
+ */
+ public int getBlockSize() {
+ return blockSize;
+ }
+
+ /**
+ * Gets the size of the associated file.
+ * @return the size of the associated file.
+ */
+ public long getFileSize() {
+ return fileSize;
+ }
+
+ /**
+ * Gets the number of blocks in the associated file.
+ * @return the number of blocks in the associated file.
+ */
+ public int getNumBlocks() {
+ return numBlocks;
+ }
+
+ /**
+ * Indicates whether the given block is the last block in the associated file.
+ * @param blockNumber the id of the desired block.
+ * @return true if the given block is the last block in the associated file, false otherwise.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public boolean isLastBlock(int blockNumber) {
+ if (fileSize == 0) {
+ return false;
+ }
+
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return blockNumber == (numBlocks - 1);
+ }
+
+ /**
+ * Gets the id of the block that contains the given absolute offset.
+ * @param offset the absolute offset to check.
+ * @return the id of the block that contains the given absolute offset.
+ * @throws IllegalArgumentException if offset is invalid.
+ */
+ public int getBlockNumber(long offset) {
+ throwIfInvalidOffset(offset);
+
+ return (int) (offset / blockSize);
+ }
+
+ /**
+ * Gets the size of the given block.
+ * @param blockNumber the id of the desired block.
+ * @return the size of the given block.
+ */
+ public int getSize(int blockNumber) {
+ if (fileSize == 0) {
+ return 0;
+ }
+
+ if (isLastBlock(blockNumber)) {
+ return (int) (fileSize - (((long) blockSize) * (numBlocks - 1)));
+ } else {
+ return blockSize;
+ }
+ }
+
+ /**
+ * Indicates whether the given absolute offset is valid.
+ * @param offset absolute offset in the file..
+ * @return true if the given absolute offset is valid, false otherwise.
+ */
+ public boolean isValidOffset(long offset) {
+ return (offset >= 0) && (offset < fileSize);
+ }
+
+ /**
+ * Gets the start offset of the given block.
+ * @param blockNumber the id of the given block.
+ * @return the start offset of the given block.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public long getStartOffset(int blockNumber) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return blockNumber * (long) blockSize;
+ }
+
+ /**
+ * Gets the relative offset corresponding to the given block and the absolute offset.
+ * @param blockNumber the id of the given block.
+ * @param offset absolute offset in the file.
+ * @return the relative offset corresponding to the given block and the absolute offset.
+ * @throws IllegalArgumentException if either blockNumber or offset is invalid.
+ */
+ public int getRelativeOffset(int blockNumber, long offset) {
+ throwIfInvalidOffset(offset);
+
+ return (int) (offset - getStartOffset(blockNumber));
+ }
+
+ /**
+ * Gets the state of the given block.
+ * @param blockNumber the id of the given block.
+ * @return the state of the given block.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public State getState(int blockNumber) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return state[blockNumber];
+ }
+
+ /**
+ * Sets the state of the given block to the given value.
+ * @param blockNumber the id of the given block.
+ * @param blockState the target state.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public void setState(int blockNumber, State blockState) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ state[blockNumber] = blockState;
+ }
+
+ // Debug helper.
+ public String getStateString() {
+ StringBuilder sb = new StringBuilder();
+ int blockNumber = 0;
+ while (blockNumber < numBlocks) {
+ State tstate = getState(blockNumber);
+ int endBlockNumber = blockNumber;
+ while ((endBlockNumber < numBlocks) && (getState(endBlockNumber)
+ == tstate)) {
+ endBlockNumber++;
+ }
+ sb.append(
+ String.format("[%03d ~ %03d] %s%n", blockNumber, endBlockNumber - 1,
+ tstate));
+ blockNumber = endBlockNumber;
+ }
+ return sb.toString();
+ }
+
+ private void throwIfInvalidBlockNumber(int blockNumber) {
+ checkWithinRange(blockNumber, "blockNumber", 0, numBlocks - 1);
+ }
+
+ private void throwIfInvalidOffset(long offset) {
+ checkWithinRange(offset, "offset", 0, fileSize - 1);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java
new file mode 100644
index 0000000000000..45f0aabe7dcd9
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+
+/**
+ * Provides read access to the underlying file one block at a time.
+ *
+ * This class is the simplest form of a {@code BlockManager} that does
+ * perform prefetching or caching.
+ */
+public abstract class BlockManager implements Closeable {
+
+ /**
+ * Information about each block of the underlying file.
+ */
+ private final BlockData blockData;
+
+ /**
+ * Constructs an instance of {@code BlockManager}.
+ *
+ * @param blockData information about each block of the underlying file.
+ *
+ * @throws IllegalArgumentException if blockData is null.
+ */
+ public BlockManager(BlockData blockData) {
+ checkNotNull(blockData, "blockData");
+
+ this.blockData = blockData;
+ }
+
+ /**
+ * Gets block data information.
+ *
+ * @return instance of {@code BlockData}.
+ */
+ public BlockData getBlockData() {
+ return blockData;
+ }
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * The entire block is read into memory and returned as a {@code BufferData}.
+ * The blocks are treated as a limited resource and must be released when
+ * one is done reading them.
+ *
+ * @param blockNumber the number of the block to be read and returned.
+ * @return {@code BufferData} having data from the given block.
+ *
+ * @throws IOException if there an error reading the given block.
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ public BufferData get(int blockNumber) throws IOException {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ int size = blockData.getSize(blockNumber);
+ ByteBuffer buffer = ByteBuffer.allocate(size);
+ long startOffset = blockData.getStartOffset(blockNumber);
+ read(buffer, startOffset, size);
+ buffer.flip();
+ return new BufferData(blockNumber, buffer);
+ }
+
+ /**
+ * Reads into the given {@code buffer} {@code size} bytes from the underlying file
+ * starting at {@code startOffset}.
+ *
+ * @param buffer the buffer to read data in to.
+ * @param startOffset the offset at which reading starts.
+ * @param size the number bytes to read.
+ * @return number of bytes read.
+ * @throws IOException if there an error reading the given block.
+ */
+ public abstract int read(ByteBuffer buffer, long startOffset, int size) throws IOException;
+
+ /**
+ * Releases resources allocated to the given block.
+ *
+ * @param data the {@code BufferData} to release.
+ *
+ * @throws IllegalArgumentException if data is null.
+ */
+ public void release(BufferData data) {
+ checkNotNull(data, "data");
+
+ // Do nothing because we allocate a new buffer each time.
+ }
+
+ /**
+ * Requests optional prefetching of the given block.
+ *
+ * @param blockNumber the id of the block to prefetch.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ public void requestPrefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ // Do nothing because we do not support prefetches.
+ }
+
+ /**
+ * Requests cancellation of any previously issued prefetch requests.
+ */
+ public void cancelPrefetches() {
+ // Do nothing because we do not support prefetches.
+ }
+
+ /**
+ * Requests that the given block should be copied to the cache. Optional operation.
+ *
+ * @param data the {@code BufferData} instance to optionally cache.
+ */
+ public void requestCaching(BufferData data) {
+ // Do nothing because we do not support caching.
+ }
+
+ @Override
+ public void close() {
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java
new file mode 100644
index 0000000000000..2744334a3bd7a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.DoubleSummaryStatistics;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+
+/**
+ * Block level operations performed on a file.
+ * This class is meant to be used by {@code BlockManager}.
+ * It is separated out in its own file due to its size.
+ *
+ * This class is used for debugging/logging. Calls to this class
+ * can be safely removed without affecting the overall operation.
+ */
+public final class BlockOperations {
+ private static final Logger LOG = LoggerFactory.getLogger(BlockOperations.class);
+
+ /**
+ * Operation kind.
+ */
+ public enum Kind {
+ UNKNOWN("??", "unknown", false),
+ CANCEL_PREFETCHES("CP", "cancelPrefetches", false),
+ CLOSE("CX", "close", false),
+ CACHE_PUT("C+", "putC", true),
+ GET_CACHED("GC", "getCached", true),
+ GET_PREFETCHED("GP", "getPrefetched", true),
+ GET_READ("GR", "getRead", true),
+ PREFETCH("PF", "prefetch", true),
+ RELEASE("RL", "release", true),
+ REQUEST_CACHING("RC", "requestCaching", true),
+ REQUEST_PREFETCH("RP", "requestPrefetch", true);
+
+ private String shortName;
+ private String name;
+ private boolean hasBlock;
+
+ Kind(String shortName, String name, boolean hasBlock) {
+ this.shortName = shortName;
+ this.name = name;
+ this.hasBlock = hasBlock;
+ }
+
+ private static Map shortNameToKind = new HashMap<>();
+
+ public static Kind fromShortName(String shortName) {
+ if (shortNameToKind.isEmpty()) {
+ for (Kind kind : Kind.values()) {
+ shortNameToKind.put(kind.shortName, kind);
+ }
+ }
+ return shortNameToKind.get(shortName);
+ }
+ }
+
+ public static class Operation {
+ private final Kind kind;
+ private final int blockNumber;
+ private final long timestamp;
+
+ public Operation(Kind kind, int blockNumber) {
+ this.kind = kind;
+ this.blockNumber = blockNumber;
+ this.timestamp = System.nanoTime();
+ }
+
+ public Kind getKind() {
+ return kind;
+ }
+
+ public int getBlockNumber() {
+ return blockNumber;
+ }
+
+ public long getTimestamp() {
+ return timestamp;
+ }
+
+ public void getSummary(StringBuilder sb) {
+ if (kind.hasBlock) {
+ sb.append(String.format("%s(%d)", kind.shortName, blockNumber));
+ } else {
+ sb.append(String.format("%s", kind.shortName));
+ }
+ }
+
+ public String getDebugInfo() {
+ if (kind.hasBlock) {
+ return String.format("--- %s(%d)", kind.name, blockNumber);
+ } else {
+ return String.format("... %s()", kind.name);
+ }
+ }
+ }
+
+ public static class End extends Operation {
+ private Operation op;
+
+ public End(Operation op) {
+ super(op.kind, op.blockNumber);
+ this.op = op;
+ }
+
+ @Override
+ public void getSummary(StringBuilder sb) {
+ sb.append("E");
+ super.getSummary(sb);
+ }
+
+ @Override
+ public String getDebugInfo() {
+ return "***" + super.getDebugInfo().substring(3);
+ }
+
+ public double duration() {
+ return (getTimestamp() - op.getTimestamp()) / 1e9;
+ }
+ }
+
+ private ArrayList ops;
+ private boolean debugMode;
+
+ public BlockOperations() {
+ this.ops = new ArrayList<>();
+ }
+
+ public synchronized void setDebug(boolean state) {
+ debugMode = state;
+ }
+
+ private synchronized Operation add(Operation op) {
+ if (debugMode) {
+ LOG.info(op.getDebugInfo());
+ }
+ ops.add(op);
+ return op;
+ }
+
+ public Operation getPrefetched(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_PREFETCHED, blockNumber));
+ }
+
+ public Operation getCached(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_CACHED, blockNumber));
+ }
+
+ public Operation getRead(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_READ, blockNumber));
+ }
+
+ public Operation release(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.RELEASE, blockNumber));
+ }
+
+ public Operation requestPrefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.REQUEST_PREFETCH, blockNumber));
+ }
+
+ public Operation prefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.PREFETCH, blockNumber));
+ }
+
+ public Operation cancelPrefetches() {
+ return add(new Operation(Kind.CANCEL_PREFETCHES, -1));
+ }
+
+ public Operation close() {
+ return add(new Operation(Kind.CLOSE, -1));
+ }
+
+ public Operation requestCaching(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.REQUEST_CACHING, blockNumber));
+ }
+
+ public Operation addToCache(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.CACHE_PUT, blockNumber));
+ }
+
+ public Operation end(Operation op) {
+ return add(new End(op));
+ }
+
+ private static void append(StringBuilder sb, String format, Object... args) {
+ sb.append(String.format(format, args));
+ }
+
+ public synchronized String getSummary(boolean showDebugInfo) {
+ StringBuilder sb = new StringBuilder();
+ for (Operation op : ops) {
+ if (op != null) {
+ if (showDebugInfo) {
+ sb.append(op.getDebugInfo());
+ sb.append("\n");
+ } else {
+ op.getSummary(sb);
+ sb.append(";");
+ }
+ }
+ }
+
+ sb.append("\n");
+ getDurationInfo(sb);
+
+ return sb.toString();
+ }
+
+ public synchronized void getDurationInfo(StringBuilder sb) {
+ Map durations = new HashMap<>();
+ for (Operation op : ops) {
+ if (op instanceof End) {
+ End endOp = (End) op;
+ DoubleSummaryStatistics stats = durations.get(endOp.getKind());
+ if (stats == null) {
+ stats = new DoubleSummaryStatistics();
+ durations.put(endOp.getKind(), stats);
+ }
+ stats.accept(endOp.duration());
+ }
+ }
+
+ List kinds = Arrays.asList(
+ Kind.GET_CACHED,
+ Kind.GET_PREFETCHED,
+ Kind.GET_READ,
+ Kind.CACHE_PUT,
+ Kind.PREFETCH,
+ Kind.REQUEST_CACHING,
+ Kind.REQUEST_PREFETCH,
+ Kind.CANCEL_PREFETCHES,
+ Kind.RELEASE,
+ Kind.CLOSE
+ );
+
+ for (Kind kind : kinds) {
+ append(sb, "%-18s : ", kind);
+ DoubleSummaryStatistics stats = durations.get(kind);
+ if (stats == null) {
+ append(sb, "--\n");
+ } else {
+ append(
+ sb,
+ "#ops = %3d, total = %5.1f, min: %3.1f, avg: %3.1f, max: %3.1f\n",
+ stats.getCount(),
+ stats.getSum(),
+ stats.getMin(),
+ stats.getAverage(),
+ stats.getMax());
+ }
+ }
+ }
+
+ public synchronized void analyze(StringBuilder sb) {
+ Map> blockOps = new HashMap<>();
+
+ // Group-by block number.
+ for (Operation op : ops) {
+ if (op.blockNumber < 0) {
+ continue;
+ }
+
+ List perBlockOps;
+ if (!blockOps.containsKey(op.blockNumber)) {
+ perBlockOps = new ArrayList<>();
+ blockOps.put(op.blockNumber, perBlockOps);
+ }
+
+ perBlockOps = blockOps.get(op.blockNumber);
+ perBlockOps.add(op);
+ }
+
+ List prefetchedNotUsed = new ArrayList<>();
+ List cachedNotUsed = new ArrayList<>();
+
+ for (Map.Entry> entry : blockOps.entrySet()) {
+ Integer blockNumber = entry.getKey();
+ List perBlockOps = entry.getValue();
+ Map kindCounts = new HashMap<>();
+ Map endKindCounts = new HashMap<>();
+
+ for (Operation op : perBlockOps) {
+ if (op instanceof End) {
+ int endCount = endKindCounts.getOrDefault(op.kind, 0) + 1;
+ endKindCounts.put(op.kind, endCount);
+ } else {
+ int count = kindCounts.getOrDefault(op.kind, 0) + 1;
+ kindCounts.put(op.kind, count);
+ }
+ }
+
+ for (Kind kind : kindCounts.keySet()) {
+ int count = kindCounts.getOrDefault(kind, 0);
+ int endCount = endKindCounts.getOrDefault(kind, 0);
+ if (count != endCount) {
+ append(sb, "[%d] %s : #ops(%d) != #end-ops(%d)\n", blockNumber, kind, count, endCount);
+ }
+
+ if (count > 1) {
+ append(sb, "[%d] %s = %d\n", blockNumber, kind, count);
+ }
+ }
+
+ int prefetchCount = kindCounts.getOrDefault(Kind.PREFETCH, 0);
+ int getPrefetchedCount = kindCounts.getOrDefault(Kind.GET_PREFETCHED, 0);
+ if ((prefetchCount > 0) && (getPrefetchedCount < prefetchCount)) {
+ prefetchedNotUsed.add(blockNumber);
+ }
+
+ int cacheCount = kindCounts.getOrDefault(Kind.CACHE_PUT, 0);
+ int getCachedCount = kindCounts.getOrDefault(Kind.GET_CACHED, 0);
+ if ((cacheCount > 0) && (getCachedCount < cacheCount)) {
+ cachedNotUsed.add(blockNumber);
+ }
+ }
+
+ if (!prefetchedNotUsed.isEmpty()) {
+ append(sb, "Prefetched but not used: %s\n", getIntList(prefetchedNotUsed));
+ }
+
+ if (!cachedNotUsed.isEmpty()) {
+ append(sb, "Cached but not used: %s\n", getIntList(cachedNotUsed));
+ }
+ }
+
+ private static String getIntList(Iterable nums) {
+ List numList = new ArrayList<>();
+ for (Integer n : nums) {
+ numList.add(n.toString());
+ }
+ return String.join(", ", numList);
+ }
+
+ public static BlockOperations fromSummary(String summary) {
+ BlockOperations ops = new BlockOperations();
+ ops.setDebug(true);
+ Pattern blockOpPattern = Pattern.compile("([A-Z+]+)(\\(([0-9]+)?\\))?");
+ String[] tokens = summary.split(";");
+ for (String token : tokens) {
+ Matcher matcher = blockOpPattern.matcher(token);
+ if (!matcher.matches()) {
+ String message = String.format("Unknown summary format: %s", token);
+ throw new IllegalArgumentException(message);
+ }
+
+ String shortName = matcher.group(1);
+ String blockNumberStr = matcher.group(3);
+ int blockNumber = (blockNumberStr == null) ? -1 : Integer.parseInt(blockNumberStr);
+ Kind kind = Kind.fromShortName(shortName);
+ Kind endKind = null;
+ if (kind == null) {
+ if (shortName.charAt(0) == 'E') {
+ endKind = Kind.fromShortName(shortName.substring(1));
+ }
+ }
+
+ if (kind == null && endKind == null) {
+ String message = String.format("Unknown short name: %s (token = %s)", shortName, token);
+ throw new IllegalArgumentException(message);
+ }
+
+ if (kind != null) {
+ ops.add(new Operation(kind, blockNumber));
+ } else {
+ Operation op = null;
+ for (int i = ops.ops.size() - 1; i >= 0; i--) {
+ op = ops.ops.get(i);
+ if ((op.blockNumber == blockNumber) && (op.kind == endKind) && !(op instanceof End)) {
+ ops.add(new End(op));
+ break;
+ }
+ }
+
+ if (op == null) {
+ LOG.warn("Start op not found: {}({})", endKind, blockNumber);
+ }
+ }
+ }
+
+ return ops;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java
new file mode 100644
index 0000000000000..a871f8237729f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.Set;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+
+/**
+ * Manages a fixed pool of resources.
+ *
+ * Avoids creating a new resource if a previously created instance is already available.
+ */
+public abstract class BoundedResourcePool extends ResourcePool {
+ /**
+ * The size of this pool. Fixed at creation time.
+ */
+ private final int size;
+
+ /**
+ * Items currently available in the pool.
+ */
+ private ArrayBlockingQueue items;
+
+ /**
+ * Items that have been created so far (regardless of whether they are currently available).
+ */
+ private Set createdItems;
+
+ /**
+ * Constructs a resource pool of the given size.
+ *
+ * @param size the size of this pool. Cannot be changed post creation.
+ *
+ * @throws IllegalArgumentException if size is zero or negative.
+ */
+ public BoundedResourcePool(int size) {
+ Validate.checkPositiveInteger(size, "size");
+
+ this.size = size;
+ this.items = new ArrayBlockingQueue<>(size);
+
+ // The created items are identified based on their object reference.
+ this.createdItems = Collections.newSetFromMap(new IdentityHashMap());
+ }
+
+ /**
+ * Acquires a resource blocking if necessary until one becomes available.
+ */
+ @Override
+ public T acquire() {
+ return this.acquireHelper(true);
+ }
+
+ /**
+ * Acquires a resource blocking if one is immediately available. Otherwise returns null.
+ */
+ @Override
+ public T tryAcquire() {
+ return this.acquireHelper(false);
+ }
+
+ /**
+ * Releases a previously acquired resource.
+ *
+ * @throws IllegalArgumentException if item is null.
+ */
+ @Override
+ public void release(T item) {
+ checkNotNull(item, "item");
+
+ synchronized (createdItems) {
+ if (!createdItems.contains(item)) {
+ throw new IllegalArgumentException("This item is not a part of this pool");
+ }
+ }
+
+ // Return if this item was released earlier.
+ // We cannot use items.contains() because that check is not based on reference equality.
+ for (T entry : items) {
+ if (entry == item) {
+ return;
+ }
+ }
+
+ try {
+ items.put(item);
+ } catch (InterruptedException e) {
+ throw new IllegalStateException("release() should never block", e);
+ }
+ }
+
+ @Override
+ public synchronized void close() {
+ for (T item : createdItems) {
+ close(item);
+ }
+
+ items.clear();
+ items = null;
+
+ createdItems.clear();
+ createdItems = null;
+ }
+
+ /**
+ * Derived classes may implement a way to cleanup each item.
+ */
+ @Override
+ protected synchronized void close(T item) {
+ // Do nothing in this class. Allow overriding classes to take any cleanup action.
+ }
+
+ /**
+ * Number of items created so far. Mostly for testing purposes.
+ * @return the count.
+ */
+ public int numCreated() {
+ synchronized (createdItems) {
+ return createdItems.size();
+ }
+ }
+
+ /**
+ * Number of items available to be acquired. Mostly for testing purposes.
+ * @return the number available.
+ */
+ public synchronized int numAvailable() {
+ return (size - numCreated()) + items.size();
+ }
+
+ // For debugging purposes.
+ @Override
+ public synchronized String toString() {
+ return String.format(
+ "size = %d, #created = %d, #in-queue = %d, #available = %d",
+ size, numCreated(), items.size(), numAvailable());
+ }
+
+ /**
+ * Derived classes must implement a way to create an instance of a resource.
+ */
+ protected abstract T createNew();
+
+ private T acquireHelper(boolean canBlock) {
+
+ // Prefer reusing an item if one is available.
+ // That avoids unnecessarily creating new instances.
+ T result = items.poll();
+ if (result != null) {
+ return result;
+ }
+
+ synchronized (createdItems) {
+ // Create a new instance if allowed by the capacity of this pool.
+ if (createdItems.size() < size) {
+ T item = createNew();
+ createdItems.add(item);
+ return item;
+ }
+ }
+
+ if (canBlock) {
+ try {
+ // Block for an instance to be available.
+ return items.take();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java
new file mode 100644
index 0000000000000..de68269ab700c
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Future;
+import java.util.zip.CRC32;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Holds the state of a ByteBuffer that is in use by {@code CachingBlockManager}.
+ *
+ * This class is not meant to be of general use. It exists into its own file due to its size.
+ * We use the term block and buffer interchangeably in this file because one buffer
+ * holds exactly one block of data.
+ *
+ * Holding all of the state associated with a block allows us to validate and control
+ * state transitions in a synchronized fashion.
+ */
+public final class BufferData {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BufferData.class);
+
+ public enum State {
+ /**
+ * Unknown / invalid state.
+ */
+ UNKNOWN,
+
+ /**
+ * Buffer has been acquired but has no data.
+ */
+ BLANK,
+
+ /**
+ * This block is being prefetched.
+ */
+ PREFETCHING,
+
+ /**
+ * This block is being added to the local cache.
+ */
+ CACHING,
+
+ /**
+ * This block has data and is ready to be read.
+ */
+ READY,
+
+ /**
+ * This block is no longer in-use and should not be used once in this state.
+ */
+ DONE
+ }
+
+ /**
+ * Number of the block associated with this buffer.
+ */
+ private final int blockNumber;
+
+ /**
+ * The buffer associated with this block.
+ */
+ private ByteBuffer buffer;
+
+ /**
+ * Current state of this block.
+ */
+ private volatile State state;
+
+ /**
+ * Future of the action being performed on this block (eg, prefetching or caching).
+ */
+ private Future action;
+
+ /**
+ * Checksum of the buffer contents once in READY state.
+ */
+ private long checksum = 0;
+
+ /**
+ * Constructs an instances of this class.
+ *
+ * @param blockNumber Number of the block associated with this buffer.
+ * @param buffer The buffer associated with this block.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ * @throws IllegalArgumentException if buffer is null.
+ */
+ public BufferData(int blockNumber, ByteBuffer buffer) {
+ Validate.checkNotNegative(blockNumber, "blockNumber");
+ Validate.checkNotNull(buffer, "buffer");
+
+ this.blockNumber = blockNumber;
+ this.buffer = buffer;
+ this.state = State.BLANK;
+ }
+
+ /**
+ * Gets the id of this block.
+ *
+ * @return the id of this block.
+ */
+ public int getBlockNumber() {
+ return this.blockNumber;
+ }
+
+ /**
+ * Gets the buffer associated with this block.
+ *
+ * @return the buffer associated with this block.
+ */
+ public ByteBuffer getBuffer() {
+ return this.buffer;
+ }
+
+ /**
+ * Gets the state of this block.
+ *
+ * @return the state of this block.
+ */
+ public State getState() {
+ return this.state;
+ }
+
+ /**
+ * Gets the checksum of data in this block.
+ *
+ * @return the checksum of data in this block.
+ */
+ public long getChecksum() {
+ return this.checksum;
+ }
+
+ /**
+ * Computes CRC32 checksum of the given buffer's contents.
+ *
+ * @param buffer the buffer whose content's checksum is to be computed.
+ * @return the computed checksum.
+ */
+ public static long getChecksum(ByteBuffer buffer) {
+ ByteBuffer tempBuffer = buffer.duplicate();
+ tempBuffer.rewind();
+ CRC32 crc32 = new CRC32();
+ crc32.update(tempBuffer);
+ return crc32.getValue();
+ }
+
+ public synchronized Future getActionFuture() {
+ return this.action;
+ }
+
+ /**
+ * Indicates that a prefetch operation is in progress.
+ *
+ * @param actionFuture the {@code Future} of a prefetch action.
+ *
+ * @throws IllegalArgumentException if actionFuture is null.
+ */
+ public synchronized void setPrefetch(Future actionFuture) {
+ Validate.checkNotNull(actionFuture, "actionFuture");
+
+ this.updateState(State.PREFETCHING, State.BLANK);
+ this.action = actionFuture;
+ }
+
+ /**
+ * Indicates that a caching operation is in progress.
+ *
+ * @param actionFuture the {@code Future} of a caching action.
+ *
+ * @throws IllegalArgumentException if actionFuture is null.
+ */
+ public synchronized void setCaching(Future actionFuture) {
+ Validate.checkNotNull(actionFuture, "actionFuture");
+
+ this.throwIfStateIncorrect(State.PREFETCHING, State.READY);
+ this.state = State.CACHING;
+ this.action = actionFuture;
+ }
+
+ /**
+ * Marks the completion of reading data into the buffer.
+ * The buffer cannot be modified once in this state.
+ *
+ * @param expectedCurrentState the collection of states from which transition to READY is allowed.
+ */
+ public synchronized void setReady(State... expectedCurrentState) {
+ if (this.checksum != 0) {
+ throw new IllegalStateException("Checksum cannot be changed once set");
+ }
+
+ this.buffer = this.buffer.asReadOnlyBuffer();
+ this.checksum = getChecksum(this.buffer);
+ this.buffer.rewind();
+ this.updateState(State.READY, expectedCurrentState);
+ }
+
+ /**
+ * Indicates that this block is no longer of use and can be reclaimed.
+ */
+ public synchronized void setDone() {
+ if (this.checksum != 0) {
+ if (getChecksum(this.buffer) != this.checksum) {
+ throw new IllegalStateException("checksum changed after setReady()");
+ }
+ }
+ this.state = State.DONE;
+ this.action = null;
+ }
+
+ /**
+ * Updates the current state to the specified value.
+ * Asserts that the current state is as expected.
+ * @param newState the state to transition to.
+ * @param expectedCurrentState the collection of states from which
+ * transition to {@code newState} is allowed.
+ *
+ * @throws IllegalArgumentException if newState is null.
+ * @throws IllegalArgumentException if expectedCurrentState is null.
+ */
+ public synchronized void updateState(State newState,
+ State... expectedCurrentState) {
+ Validate.checkNotNull(newState, "newState");
+ Validate.checkNotNull(expectedCurrentState, "expectedCurrentState");
+
+ this.throwIfStateIncorrect(expectedCurrentState);
+ this.state = newState;
+ }
+
+ /**
+ * Helper that asserts the current state is one of the expected values.
+ *
+ * @param states the collection of allowed states.
+ *
+ * @throws IllegalArgumentException if states is null.
+ */
+ public void throwIfStateIncorrect(State... states) {
+ Validate.checkNotNull(states, "states");
+
+ if (this.stateEqualsOneOf(states)) {
+ return;
+ }
+
+ List statesStr = new ArrayList();
+ for (State s : states) {
+ statesStr.add(s.toString());
+ }
+
+ String message = String.format(
+ "Expected buffer state to be '%s' but found: %s",
+ String.join(" or ", statesStr), this);
+ throw new IllegalStateException(message);
+ }
+
+ public boolean stateEqualsOneOf(State... states) {
+ State currentState = this.state;
+
+ for (State s : states) {
+ if (currentState == s) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public String toString() {
+
+ return String.format(
+ "[%03d] id: %03d, %s: buf: %s, checksum: %d, future: %s",
+ this.blockNumber,
+ System.identityHashCode(this),
+ this.state,
+ this.getBufferStr(this.buffer),
+ this.checksum,
+ this.getFutureStr(this.action));
+ }
+
+ private String getFutureStr(Future f) {
+ if (f == null) {
+ return "--";
+ } else {
+ return this.action.isDone() ? "done" : "not done";
+ }
+ }
+
+ private String getBufferStr(ByteBuffer buf) {
+ if (buf == null) {
+ return "--";
+ } else {
+ return String.format(
+ "(id = %d, pos = %d, lim = %d)",
+ System.identityHashCode(buf),
+ buf.position(), buf.limit());
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java
new file mode 100644
index 0000000000000..189357f6bd04f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Future;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkState;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+import static org.apache.hadoop.util.Preconditions.checkNotNull;
+
+/**
+ * Manages a fixed pool of {@code ByteBuffer} instances.
+ *
+ * Avoids creating a new buffer if a previously created buffer is already available.
+ */
+public class BufferPool implements Closeable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BufferPool.class);
+
+ /**
+ * Max number of buffers in this pool.
+ */
+ private final int size;
+
+ /**
+ * Size in bytes of each buffer.
+ */
+ private final int bufferSize;
+
+ /*
+ Invariants for internal state.
+ -- a buffer is either in this.pool or in this.allocated
+ -- transition between this.pool <==> this.allocated must be atomic
+ -- only one buffer allocated for a given blockNumber
+ */
+
+
+ /**
+ * Underlying bounded resource pool.
+ */
+ private BoundedResourcePool pool;
+
+ /**
+ * Allows associating metadata to each buffer in the pool.
+ */
+ private Map allocated;
+
+ /**
+ * Prefetching stats.
+ */
+ private PrefetchingStatistics prefetchingStatistics;
+
+ /**
+ * Initializes a new instance of the {@code BufferPool} class.
+ * @param size number of buffer in this pool.
+ * @param bufferSize size in bytes of each buffer.
+ * @param prefetchingStatistics statistics for this stream.
+ * @throws IllegalArgumentException if size is zero or negative.
+ * @throws IllegalArgumentException if bufferSize is zero or negative.
+ */
+ public BufferPool(int size,
+ int bufferSize,
+ PrefetchingStatistics prefetchingStatistics) {
+ Validate.checkPositiveInteger(size, "size");
+ Validate.checkPositiveInteger(bufferSize, "bufferSize");
+
+ this.size = size;
+ this.bufferSize = bufferSize;
+ this.allocated = new IdentityHashMap();
+ this.prefetchingStatistics = requireNonNull(prefetchingStatistics);
+ this.pool = new BoundedResourcePool(size) {
+ @Override
+ public ByteBuffer createNew() {
+ ByteBuffer buffer = ByteBuffer.allocate(bufferSize);
+ prefetchingStatistics.memoryAllocated(bufferSize);
+ return buffer;
+ }
+ };
+ }
+
+ /**
+ * Gets a list of all blocks in this pool.
+ * @return a list of all blocks in this pool.
+ */
+ public List getAll() {
+ synchronized (allocated) {
+ return Collections.unmodifiableList(new ArrayList<>(allocated.keySet()));
+ }
+ }
+
+ /**
+ * Acquires a {@code ByteBuffer}; blocking if necessary until one becomes available.
+ * @param blockNumber the id of the block to acquire.
+ * @return the acquired block's {@code BufferData}.
+ */
+ public synchronized BufferData acquire(int blockNumber) {
+ BufferData data;
+ final int maxRetryDelayMs = 600 * 1000;
+ final int statusUpdateDelayMs = 120 * 1000;
+ Retryer retryer = new Retryer(10, maxRetryDelayMs, statusUpdateDelayMs);
+
+ do {
+ if (retryer.updateStatus()) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("waiting to acquire block: {}", blockNumber);
+ LOG.debug("state = {}", this);
+ }
+ releaseReadyBlock(blockNumber);
+ }
+ data = tryAcquire(blockNumber);
+ }
+ while ((data == null) && retryer.continueRetry());
+
+ if (data != null) {
+ return data;
+ } else {
+ String message =
+ String.format("Wait failed for acquire(%d)", blockNumber);
+ throw new IllegalStateException(message);
+ }
+ }
+
+ /**
+ * Acquires a buffer if one is immediately available. Otherwise returns null.
+ * @param blockNumber the id of the block to try acquire.
+ * @return the acquired block's {@code BufferData} or null.
+ */
+ public synchronized BufferData tryAcquire(int blockNumber) {
+ return acquireHelper(blockNumber, false);
+ }
+
+ private synchronized BufferData acquireHelper(int blockNumber,
+ boolean canBlock) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ releaseDoneBlocks();
+
+ BufferData data = find(blockNumber);
+ if (data != null) {
+ return data;
+ }
+
+ ByteBuffer buffer = canBlock ? pool.acquire() : pool.tryAcquire();
+ if (buffer == null) {
+ return null;
+ }
+
+ buffer.clear();
+ data = new BufferData(blockNumber, buffer.duplicate());
+
+ synchronized (allocated) {
+ checkState(find(blockNumber) == null, "buffer data already exists");
+
+ allocated.put(data, buffer);
+ }
+
+ return data;
+ }
+
+ /**
+ * Releases resources for any blocks marked as 'done'.
+ */
+ private synchronized void releaseDoneBlocks() {
+ for (BufferData data : getAll()) {
+ if (data.stateEqualsOneOf(BufferData.State.DONE)) {
+ release(data);
+ }
+ }
+ }
+
+ /**
+ * If no blocks were released after calling releaseDoneBlocks() a few times,
+ * we may end up waiting forever. To avoid that situation, we try releasing
+ * a 'ready' block farthest away from the given block.
+ */
+ private synchronized void releaseReadyBlock(int blockNumber) {
+ BufferData releaseTarget = null;
+ for (BufferData data : getAll()) {
+ if (data.stateEqualsOneOf(BufferData.State.READY)) {
+ if (releaseTarget == null) {
+ releaseTarget = data;
+ } else {
+ if (distance(data, blockNumber) > distance(releaseTarget,
+ blockNumber)) {
+ releaseTarget = data;
+ }
+ }
+ }
+ }
+
+ if (releaseTarget != null) {
+ LOG.warn("releasing 'ready' block: {}", releaseTarget);
+ releaseTarget.setDone();
+ }
+ }
+
+ private int distance(BufferData data, int blockNumber) {
+ return Math.abs(data.getBlockNumber() - blockNumber);
+ }
+
+ /**
+ * Releases a previously acquired resource.
+ * @param data the {@code BufferData} instance to release.
+ * @throws IllegalArgumentException if data is null.
+ * @throws IllegalArgumentException if data cannot be released due to its state.
+ */
+ public synchronized void release(BufferData data) {
+ checkNotNull(data, "data");
+
+ synchronized (data) {
+ checkArgument(
+ canRelease(data),
+ String.format("Unable to release buffer: %s", data));
+
+ ByteBuffer buffer = allocated.get(data);
+ if (buffer == null) {
+ // Likely released earlier.
+ return;
+ }
+ buffer.clear();
+ pool.release(buffer);
+ allocated.remove(data);
+ }
+
+ releaseDoneBlocks();
+ }
+
+ @Override
+ public synchronized void close() {
+ for (BufferData data : getAll()) {
+ Future actionFuture = data.getActionFuture();
+ if (actionFuture != null) {
+ actionFuture.cancel(true);
+ }
+ }
+
+ int currentPoolSize = pool.numCreated();
+
+ pool.close();
+ pool = null;
+
+ allocated.clear();
+ allocated = null;
+
+ prefetchingStatistics.memoryFreed(currentPoolSize * bufferSize);
+ }
+
+ // For debugging purposes.
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(pool.toString());
+ sb.append("\n");
+ List allData = new ArrayList<>(getAll());
+ Collections.sort(allData,
+ (d1, d2) -> d1.getBlockNumber() - d2.getBlockNumber());
+ for (BufferData data : allData) {
+ sb.append(data.toString());
+ sb.append("\n");
+ }
+
+ return sb.toString();
+ }
+
+ // Number of ByteBuffers created so far.
+ public synchronized int numCreated() {
+ return pool.numCreated();
+ }
+
+ // Number of ByteBuffers available to be acquired.
+ public synchronized int numAvailable() {
+ releaseDoneBlocks();
+ return pool.numAvailable();
+ }
+
+ private BufferData find(int blockNumber) {
+ synchronized (allocated) {
+ for (BufferData data : allocated.keySet()) {
+ if ((data.getBlockNumber() == blockNumber)
+ && !data.stateEqualsOneOf(BufferData.State.DONE)) {
+ return data;
+ }
+ }
+ }
+
+ return null;
+ }
+
+ private boolean canRelease(BufferData data) {
+ return data.stateEqualsOneOf(
+ BufferData.State.DONE,
+ BufferData.State.READY);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java
new file mode 100644
index 0000000000000..31084c7bf2648
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/CachingBlockManager.java
@@ -0,0 +1,638 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.statistics.DurationTracker;
+
+import static java.util.Objects.requireNonNull;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
+
+/**
+ * Provides read access to the underlying file one block at a time.
+ * Improve read performance by prefetching and locall caching blocks.
+ */
+public abstract class CachingBlockManager extends BlockManager {
+ private static final Logger LOG = LoggerFactory.getLogger(CachingBlockManager.class);
+ private static final int TIMEOUT_MINUTES = 60;
+
+ /**
+ * Asynchronous tasks are performed in this pool.
+ */
+ private final ExecutorServiceFuturePool futurePool;
+
+ /**
+ * Pool of shared ByteBuffer instances.
+ */
+ private BufferPool bufferPool;
+
+ /**
+ * Size of the in-memory cache in terms of number of blocks.
+ * Total memory consumption is up to bufferPoolSize * blockSize.
+ */
+ private final int bufferPoolSize;
+
+ /**
+ * Local block cache.
+ */
+ private BlockCache cache;
+
+ /**
+ * Error counts. For testing purposes.
+ */
+ private final AtomicInteger numCachingErrors;
+ private final AtomicInteger numReadErrors;
+
+ /**
+ * Operations performed by this block manager.
+ */
+ private final BlockOperations ops;
+
+ private boolean closed;
+
+ /**
+ * If a single caching operation takes more than this time (in seconds),
+ * we disable caching to prevent further perf degradation due to caching.
+ */
+ private static final int SLOW_CACHING_THRESHOLD = 5;
+
+ /**
+ * Once set to true, any further caching requests will be ignored.
+ */
+ private final AtomicBoolean cachingDisabled;
+
+ private final PrefetchingStatistics prefetchingStatistics;
+
+ /**
+ * Constructs an instance of a {@code CachingBlockManager}.
+ *
+ * @param futurePool asynchronous tasks are performed in this pool.
+ * @param blockData information about each block of the underlying file.
+ * @param bufferPoolSize size of the in-memory cache in terms of number of blocks.
+ * @param prefetchingStatistics statistics for this stream.
+ *
+ * @throws IllegalArgumentException if bufferPoolSize is zero or negative.
+ */
+ public CachingBlockManager(
+ ExecutorServiceFuturePool futurePool,
+ BlockData blockData,
+ int bufferPoolSize,
+ PrefetchingStatistics prefetchingStatistics) {
+ super(blockData);
+
+ Validate.checkPositiveInteger(bufferPoolSize, "bufferPoolSize");
+
+ this.futurePool = requireNonNull(futurePool);
+ this.bufferPoolSize = bufferPoolSize;
+ this.numCachingErrors = new AtomicInteger();
+ this.numReadErrors = new AtomicInteger();
+ this.cachingDisabled = new AtomicBoolean();
+ this.prefetchingStatistics = requireNonNull(prefetchingStatistics);
+
+ if (this.getBlockData().getFileSize() > 0) {
+ this.bufferPool = new BufferPool(bufferPoolSize, this.getBlockData().getBlockSize(),
+ this.prefetchingStatistics);
+ this.cache = this.createCache();
+ }
+
+ this.ops = new BlockOperations();
+ this.ops.setDebug(false);
+ }
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ @Override
+ public BufferData get(int blockNumber) throws IOException {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ BufferData data;
+ final int maxRetryDelayMs = bufferPoolSize * 120 * 1000;
+ final int statusUpdateDelayMs = 120 * 1000;
+ Retryer retryer = new Retryer(10, maxRetryDelayMs, statusUpdateDelayMs);
+ boolean done;
+
+ do {
+ if (closed) {
+ throw new IOException("this stream is already closed");
+ }
+
+ data = bufferPool.acquire(blockNumber);
+ done = getInternal(data);
+
+ if (retryer.updateStatus()) {
+ LOG.warn("waiting to get block: {}", blockNumber);
+ LOG.info("state = {}", this.toString());
+ }
+ }
+ while (!done && retryer.continueRetry());
+
+ if (done) {
+ return data;
+ } else {
+ String message = String.format("Wait failed for get(%d)", blockNumber);
+ throw new IllegalStateException(message);
+ }
+ }
+
+ private boolean getInternal(BufferData data) throws IOException {
+ Validate.checkNotNull(data, "data");
+
+ // Opportunistic check without locking.
+ if (data.stateEqualsOneOf(
+ BufferData.State.PREFETCHING,
+ BufferData.State.CACHING,
+ BufferData.State.DONE)) {
+ return false;
+ }
+
+ synchronized (data) {
+ // Reconfirm state after locking.
+ if (data.stateEqualsOneOf(
+ BufferData.State.PREFETCHING,
+ BufferData.State.CACHING,
+ BufferData.State.DONE)) {
+ return false;
+ }
+
+ int blockNumber = data.getBlockNumber();
+ if (data.getState() == BufferData.State.READY) {
+ BlockOperations.Operation op = ops.getPrefetched(blockNumber);
+ ops.end(op);
+ return true;
+ }
+
+ data.throwIfStateIncorrect(BufferData.State.BLANK);
+ read(data);
+ return true;
+ }
+ }
+
+ /**
+ * Releases resources allocated to the given block.
+ *
+ * @throws IllegalArgumentException if data is null.
+ */
+ @Override
+ public void release(BufferData data) {
+ if (closed) {
+ return;
+ }
+
+ Validate.checkNotNull(data, "data");
+
+ BlockOperations.Operation op = ops.release(data.getBlockNumber());
+ bufferPool.release(data);
+ ops.end(op);
+ }
+
+ @Override
+ public synchronized void close() {
+ if (closed) {
+ return;
+ }
+
+ closed = true;
+
+ final BlockOperations.Operation op = ops.close();
+
+ // Cancel any prefetches in progress.
+ cancelPrefetches();
+
+ cleanupWithLogger(LOG, cache);
+
+ ops.end(op);
+ LOG.info(ops.getSummary(false));
+
+ bufferPool.close();
+ bufferPool = null;
+ }
+
+ /**
+ * Requests optional prefetching of the given block.
+ * The block is prefetched only if we can acquire a free buffer.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ @Override
+ public void requestPrefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ if (closed) {
+ return;
+ }
+
+ // We initiate a prefetch only if we can acquire a buffer from the shared pool.
+ BufferData data = bufferPool.tryAcquire(blockNumber);
+ if (data == null) {
+ return;
+ }
+
+ // Opportunistic check without locking.
+ if (!data.stateEqualsOneOf(BufferData.State.BLANK)) {
+ // The block is ready or being prefetched/cached.
+ return;
+ }
+
+ synchronized (data) {
+ // Reconfirm state after locking.
+ if (!data.stateEqualsOneOf(BufferData.State.BLANK)) {
+ // The block is ready or being prefetched/cached.
+ return;
+ }
+
+ BlockOperations.Operation op = ops.requestPrefetch(blockNumber);
+ PrefetchTask prefetchTask = new PrefetchTask(data, this, Instant.now());
+ Future prefetchFuture = futurePool.executeFunction(prefetchTask);
+ data.setPrefetch(prefetchFuture);
+ ops.end(op);
+ }
+ }
+
+ /**
+ * Requests cancellation of any previously issued prefetch requests.
+ */
+ @Override
+ public void cancelPrefetches() {
+ BlockOperations.Operation op = ops.cancelPrefetches();
+
+ for (BufferData data : bufferPool.getAll()) {
+ // We add blocks being prefetched to the local cache so that the prefetch is not wasted.
+ if (data.stateEqualsOneOf(BufferData.State.PREFETCHING, BufferData.State.READY)) {
+ requestCaching(data);
+ }
+ }
+
+ ops.end(op);
+ }
+
+ private void read(BufferData data) throws IOException {
+ synchronized (data) {
+ readBlock(data, false, BufferData.State.BLANK);
+ }
+ }
+
+ private void prefetch(BufferData data, Instant taskQueuedStartTime) throws IOException {
+ synchronized (data) {
+ prefetchingStatistics.executorAcquired(
+ Duration.between(taskQueuedStartTime, Instant.now()));
+ readBlock(
+ data,
+ true,
+ BufferData.State.PREFETCHING,
+ BufferData.State.CACHING);
+ }
+ }
+
+ private void readBlock(BufferData data, boolean isPrefetch, BufferData.State... expectedState)
+ throws IOException {
+
+ if (closed) {
+ return;
+ }
+
+ BlockOperations.Operation op = null;
+ DurationTracker tracker = null;
+
+ synchronized (data) {
+ try {
+ if (data.stateEqualsOneOf(BufferData.State.DONE, BufferData.State.READY)) {
+ // DONE : Block was released, likely due to caching being disabled on slow perf.
+ // READY : Block was already fetched by another thread. No need to re-read.
+ return;
+ }
+
+ data.throwIfStateIncorrect(expectedState);
+ int blockNumber = data.getBlockNumber();
+
+ // Prefer reading from cache over reading from network.
+ if (cache.containsBlock(blockNumber)) {
+ op = ops.getCached(blockNumber);
+ cache.get(blockNumber, data.getBuffer());
+ data.setReady(expectedState);
+ return;
+ }
+
+ if (isPrefetch) {
+ tracker = prefetchingStatistics.prefetchOperationStarted();
+ op = ops.prefetch(data.getBlockNumber());
+ } else {
+ op = ops.getRead(data.getBlockNumber());
+ }
+
+ long offset = getBlockData().getStartOffset(data.getBlockNumber());
+ int size = getBlockData().getSize(data.getBlockNumber());
+ ByteBuffer buffer = data.getBuffer();
+ buffer.clear();
+ read(buffer, offset, size);
+ buffer.flip();
+ data.setReady(expectedState);
+ } catch (Exception e) {
+ String message = String.format("error during readBlock(%s)", data.getBlockNumber());
+ LOG.error(message, e);
+
+ if (isPrefetch && tracker != null) {
+ tracker.failed();
+ }
+
+ numReadErrors.incrementAndGet();
+ data.setDone();
+ throw e;
+ } finally {
+ if (op != null) {
+ ops.end(op);
+ }
+
+ if (isPrefetch) {
+ prefetchingStatistics.prefetchOperationCompleted();
+ if (tracker != null) {
+ tracker.close();
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Read task that is submitted to the future pool.
+ */
+ private static class PrefetchTask implements Supplier {
+ private final BufferData data;
+ private final CachingBlockManager blockManager;
+ private final Instant taskQueuedStartTime;
+
+ PrefetchTask(BufferData data, CachingBlockManager blockManager, Instant taskQueuedStartTime) {
+ this.data = data;
+ this.blockManager = blockManager;
+ this.taskQueuedStartTime = taskQueuedStartTime;
+ }
+
+ @Override
+ public Void get() {
+ try {
+ blockManager.prefetch(data, taskQueuedStartTime);
+ } catch (Exception e) {
+ LOG.error("error during prefetch", e);
+ }
+ return null;
+ }
+ }
+
+ private static final BufferData.State[] EXPECTED_STATE_AT_CACHING =
+ new BufferData.State[] {
+ BufferData.State.PREFETCHING, BufferData.State.READY
+ };
+
+ /**
+ * Requests that the given block should be copied to the local cache.
+ * The block must not be accessed by the caller after calling this method
+ * because it will released asynchronously relative to the caller.
+ *
+ * @throws IllegalArgumentException if data is null.
+ */
+ @Override
+ public void requestCaching(BufferData data) {
+ if (closed) {
+ return;
+ }
+
+ if (cachingDisabled.get()) {
+ data.setDone();
+ return;
+ }
+
+ Validate.checkNotNull(data, "data");
+
+ // Opportunistic check without locking.
+ if (!data.stateEqualsOneOf(EXPECTED_STATE_AT_CACHING)) {
+ return;
+ }
+
+ synchronized (data) {
+ // Reconfirm state after locking.
+ if (!data.stateEqualsOneOf(EXPECTED_STATE_AT_CACHING)) {
+ return;
+ }
+
+ if (cache.containsBlock(data.getBlockNumber())) {
+ data.setDone();
+ return;
+ }
+
+ BufferData.State state = data.getState();
+
+ BlockOperations.Operation op = ops.requestCaching(data.getBlockNumber());
+ Future blockFuture;
+ if (state == BufferData.State.PREFETCHING) {
+ blockFuture = data.getActionFuture();
+ } else {
+ CompletableFuture cf = new CompletableFuture<>();
+ cf.complete(null);
+ blockFuture = cf;
+ }
+
+ CachePutTask task = new CachePutTask(data, blockFuture, this, Instant.now());
+ Future actionFuture = futurePool.executeFunction(task);
+ data.setCaching(actionFuture);
+ ops.end(op);
+ }
+ }
+
+ private void addToCacheAndRelease(BufferData data, Future blockFuture,
+ Instant taskQueuedStartTime) {
+ prefetchingStatistics.executorAcquired(
+ Duration.between(taskQueuedStartTime, Instant.now()));
+
+ if (closed) {
+ return;
+ }
+
+ if (cachingDisabled.get()) {
+ data.setDone();
+ return;
+ }
+
+ try {
+ blockFuture.get(TIMEOUT_MINUTES, TimeUnit.MINUTES);
+ if (data.stateEqualsOneOf(BufferData.State.DONE)) {
+ // There was an error during prefetch.
+ return;
+ }
+ } catch (Exception e) {
+ LOG.error("error waiting on blockFuture: {}", data, e);
+ data.setDone();
+ return;
+ }
+
+ if (cachingDisabled.get()) {
+ data.setDone();
+ return;
+ }
+
+ BlockOperations.Operation op = null;
+
+ synchronized (data) {
+ try {
+ if (data.stateEqualsOneOf(BufferData.State.DONE)) {
+ return;
+ }
+
+ if (cache.containsBlock(data.getBlockNumber())) {
+ data.setDone();
+ return;
+ }
+
+ op = ops.addToCache(data.getBlockNumber());
+ ByteBuffer buffer = data.getBuffer().duplicate();
+ buffer.rewind();
+ cachePut(data.getBlockNumber(), buffer);
+ data.setDone();
+ } catch (Exception e) {
+ numCachingErrors.incrementAndGet();
+ String message = String.format("error adding block to cache after wait: %s", data);
+ LOG.error(message, e);
+ data.setDone();
+ }
+
+ if (op != null) {
+ BlockOperations.End endOp = (BlockOperations.End) ops.end(op);
+ if (endOp.duration() > SLOW_CACHING_THRESHOLD) {
+ if (!cachingDisabled.getAndSet(true)) {
+ String message = String.format(
+ "Caching disabled because of slow operation (%.1f sec)", endOp.duration());
+ LOG.warn(message);
+ }
+ }
+ }
+ }
+ }
+
+ protected BlockCache createCache() {
+ return new SingleFilePerBlockCache(prefetchingStatistics);
+ }
+
+ protected void cachePut(int blockNumber, ByteBuffer buffer) throws IOException {
+ if (closed) {
+ return;
+ }
+
+ cache.put(blockNumber, buffer);
+ }
+
+ private static class CachePutTask implements Supplier {
+ private final BufferData data;
+
+ // Block being asynchronously fetched.
+ private final Future blockFuture;
+
+ // Block manager that manages this block.
+ private final CachingBlockManager blockManager;
+
+ private final Instant taskQueuedStartTime;
+
+ CachePutTask(
+ BufferData data,
+ Future blockFuture,
+ CachingBlockManager blockManager,
+ Instant taskQueuedStartTime) {
+ this.data = data;
+ this.blockFuture = blockFuture;
+ this.blockManager = blockManager;
+ this.taskQueuedStartTime = taskQueuedStartTime;
+ }
+
+ @Override
+ public Void get() {
+ blockManager.addToCacheAndRelease(data, blockFuture, taskQueuedStartTime);
+ return null;
+ }
+ }
+
+ /**
+ * Number of ByteBuffers available to be acquired.
+ *
+ * @return the number of available buffers.
+ */
+ public int numAvailable() {
+ return bufferPool.numAvailable();
+ }
+
+ /**
+ * Number of caching operations completed.
+ *
+ * @return the number of cached buffers.
+ */
+ public int numCached() {
+ return cache.size();
+ }
+
+ /**
+ * Number of errors encountered when caching.
+ *
+ * @return the number of errors encountered when caching.
+ */
+ public int numCachingErrors() {
+ return numCachingErrors.get();
+ }
+
+ /**
+ * Number of errors encountered when reading.
+ *
+ * @return the number of errors encountered when reading.
+ */
+ public int numReadErrors() {
+ return numReadErrors.get();
+ }
+
+ BufferData getData(int blockNumber) {
+ return bufferPool.tryAcquire(blockNumber);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+
+ sb.append("cache(");
+ sb.append(cache.toString());
+ sb.append("); ");
+
+ sb.append("pool: ");
+ sb.append(bufferPool.toString());
+
+ return sb.toString();
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java
new file mode 100644
index 0000000000000..177ff7abab8b7
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/EmptyPrefetchingStatistics.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.time.Duration;
+
+import org.apache.hadoop.fs.statistics.DurationTracker;
+
+import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.stubDurationTracker;
+
+/**
+ * Empty implementation of the prefetching statistics interface.
+ */
+public final class EmptyPrefetchingStatistics
+ implements PrefetchingStatistics {
+
+ private static final EmptyPrefetchingStatistics
+ EMPTY_PREFETCHING_STATISTICS =
+ new EmptyPrefetchingStatistics();
+
+ private EmptyPrefetchingStatistics() {
+ }
+
+ public static EmptyPrefetchingStatistics getInstance() {
+ return EMPTY_PREFETCHING_STATISTICS;
+ }
+
+ @Override
+ public DurationTracker prefetchOperationStarted() {
+ return stubDurationTracker();
+ }
+
+ @Override
+ public void blockAddedToFileCache() {
+
+ }
+
+ @Override
+ public void blockRemovedFromFileCache() {
+
+ }
+
+ @Override
+ public void prefetchOperationCompleted() {
+
+ }
+
+ @Override
+ public void executorAcquired(Duration timeInQueue) {
+
+ }
+
+ @Override
+ public void memoryAllocated(int size) {
+
+ }
+
+ @Override
+ public void memoryFreed(int size) {
+
+ }
+}
+
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java
new file mode 100644
index 0000000000000..9ef50e50d7e5e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ExecutorServiceFuturePool.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.Locale;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
+import java.util.function.Supplier;
+
+/**
+ * A FuturePool implementation backed by a java.util.concurrent.ExecutorService.
+ *
+ * If a piece of work has started, it cannot (currently) be cancelled.
+ *
+ * This class is a simplified version of com.twitter:util-core_2.11
+ * ExecutorServiceFuturePool designed to avoid depending on that Scala library.
+ * One problem with using a Scala library is that many downstream projects
+ * (eg Apache Spark) use Scala, and they might want to use a different version of Scala
+ * from the version that Hadoop chooses to use.
+ *
+ */
+public class ExecutorServiceFuturePool {
+ private ExecutorService executor;
+
+ public ExecutorServiceFuturePool(ExecutorService executor) {
+ this.executor = executor;
+ }
+
+ /**
+ * @param f function to run in future on executor pool
+ * @return future
+ * @throws java.util.concurrent.RejectedExecutionException can be thrown
+ * @throws NullPointerException if f param is null
+ */
+ public Future executeFunction(final Supplier f) {
+ return executor.submit(f::get);
+ }
+
+ /**
+ * @param r runnable to run in future on executor pool
+ * @return future
+ * @throws java.util.concurrent.RejectedExecutionException can be thrown
+ * @throws NullPointerException if r param is null
+ */
+ @SuppressWarnings("unchecked")
+ public Future executeRunnable(final Runnable r) {
+ return (Future) executor.submit(r::run);
+ }
+
+ public String toString() {
+ return String.format(Locale.ROOT, "ExecutorServiceFuturePool(executor=%s)", executor);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java
new file mode 100644
index 0000000000000..7cd3bb3de2b58
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/FilePosition.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.nio.ByteBuffer;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkState;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkWithinRange;
+
+/**
+ * Provides functionality related to tracking the position within a file.
+ *
+ * The file is accessed through an in memory buffer. The absolute position within
+ * the file is the sum of start offset of the buffer within the file and the relative
+ * offset of the current access location within the buffer.
+ *
+ * A file is made up of equal sized blocks. The last block may be of a smaller size.
+ * The size of a buffer associated with this file is typically the same as block size.
+ */
+public final class FilePosition {
+
+ /**
+ * Holds block based information about a file.
+ */
+ private BlockData blockData;
+
+ /**
+ * Information about the buffer in use.
+ */
+ private BufferData data;
+
+ /**
+ * Provides access to the underlying file.
+ */
+ private ByteBuffer buffer;
+
+ /**
+ * Start offset of the buffer relative to the start of a file.
+ */
+ private long bufferStartOffset;
+
+ /**
+ * Offset where reading starts relative to the start of a file.
+ */
+ private long readStartOffset;
+
+ // Read stats after a seek (mostly for debugging use).
+ private int numSingleByteReads;
+
+ private int numBytesRead;
+
+ private int numBufferReads;
+
+ /**
+ * Constructs an instance of {@link FilePosition}.
+ *
+ * @param fileSize size of the associated file.
+ * @param blockSize size of each block within the file.
+ *
+ * @throws IllegalArgumentException if fileSize is negative.
+ * @throws IllegalArgumentException if blockSize is zero or negative.
+ */
+ public FilePosition(long fileSize, int blockSize) {
+ checkNotNegative(fileSize, "fileSize");
+ if (fileSize == 0) {
+ checkNotNegative(blockSize, "blockSize");
+ } else {
+ checkPositiveInteger(blockSize, "blockSize");
+ }
+
+ this.blockData = new BlockData(fileSize, blockSize);
+
+ // The position is valid only when a valid buffer is associated with this file.
+ this.invalidate();
+ }
+
+ /**
+ * Associates a buffer with this file.
+ *
+ * @param bufferData the buffer associated with this file.
+ * @param startOffset Start offset of the buffer relative to the start of a file.
+ * @param readOffset Offset where reading starts relative to the start of a file.
+ *
+ * @throws IllegalArgumentException if bufferData is null.
+ * @throws IllegalArgumentException if startOffset is negative.
+ * @throws IllegalArgumentException if readOffset is negative.
+ * @throws IllegalArgumentException if readOffset is outside the range [startOffset, buffer end].
+ */
+ public void setData(BufferData bufferData,
+ long startOffset,
+ long readOffset) {
+ checkNotNull(bufferData, "bufferData");
+ checkNotNegative(startOffset, "startOffset");
+ checkNotNegative(readOffset, "readOffset");
+ checkWithinRange(
+ readOffset,
+ "readOffset",
+ startOffset,
+ startOffset + bufferData.getBuffer().limit() - 1);
+
+ data = bufferData;
+ buffer = bufferData.getBuffer().duplicate();
+ bufferStartOffset = startOffset;
+ readStartOffset = readOffset;
+ setAbsolute(readOffset);
+
+ resetReadStats();
+ }
+
+ public ByteBuffer buffer() {
+ throwIfInvalidBuffer();
+ return buffer;
+ }
+
+ public BufferData data() {
+ throwIfInvalidBuffer();
+ return data;
+ }
+
+ /**
+ * Gets the current absolute position within this file.
+ *
+ * @return the current absolute position within this file.
+ */
+ public long absolute() {
+ throwIfInvalidBuffer();
+ return bufferStartOffset + relative();
+ }
+
+ /**
+ * If the given {@code pos} lies within the current buffer, updates the current position to
+ * the specified value and returns true; otherwise returns false without changing the position.
+ *
+ * @param pos the absolute position to change the current position to if possible.
+ * @return true if the given current position was updated, false otherwise.
+ */
+ public boolean setAbsolute(long pos) {
+ if (isValid() && isWithinCurrentBuffer(pos)) {
+ int relativePos = (int) (pos - bufferStartOffset);
+ buffer.position(relativePos);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Gets the current position within this file relative to the start of the associated buffer.
+ *
+ * @return the current position within this file relative to the start of the associated buffer.
+ */
+ public int relative() {
+ throwIfInvalidBuffer();
+ return buffer.position();
+ }
+
+ /**
+ * Determines whether the given absolute position lies within the current buffer.
+ *
+ * @param pos the position to check.
+ * @return true if the given absolute position lies within the current buffer, false otherwise.
+ */
+ public boolean isWithinCurrentBuffer(long pos) {
+ throwIfInvalidBuffer();
+ long bufferEndOffset = bufferStartOffset + buffer.limit() - 1;
+ return (pos >= bufferStartOffset) && (pos <= bufferEndOffset);
+ }
+
+ /**
+ * Gets the id of the current block.
+ *
+ * @return the id of the current block.
+ */
+ public int blockNumber() {
+ throwIfInvalidBuffer();
+ return blockData.getBlockNumber(bufferStartOffset);
+ }
+
+ /**
+ * Determines whether the current block is the last block in this file.
+ *
+ * @return true if the current block is the last block in this file, false otherwise.
+ */
+ public boolean isLastBlock() {
+ return blockData.isLastBlock(blockNumber());
+ }
+
+ /**
+ * Determines if the current position is valid.
+ *
+ * @return true if the current position is valid, false otherwise.
+ */
+ public boolean isValid() {
+ return buffer != null;
+ }
+
+ /**
+ * Marks the current position as invalid.
+ */
+ public void invalidate() {
+ buffer = null;
+ bufferStartOffset = -1;
+ data = null;
+ }
+
+ /**
+ * Gets the start of the current block's absolute offset.
+ *
+ * @return the start of the current block's absolute offset.
+ */
+ public long bufferStartOffset() {
+ throwIfInvalidBuffer();
+ return bufferStartOffset;
+ }
+
+ /**
+ * Determines whether the current buffer has been fully read.
+ *
+ * @return true if the current buffer has been fully read, false otherwise.
+ */
+ public boolean bufferFullyRead() {
+ throwIfInvalidBuffer();
+ return (bufferStartOffset == readStartOffset)
+ && (relative() == buffer.limit())
+ && (numBytesRead == buffer.limit());
+ }
+
+ public void incrementBytesRead(int n) {
+ numBytesRead += n;
+ if (n == 1) {
+ numSingleByteReads++;
+ } else {
+ numBufferReads++;
+ }
+ }
+
+ public int numBytesRead() {
+ return numBytesRead;
+ }
+
+ public int numSingleByteReads() {
+ return numSingleByteReads;
+ }
+
+ public int numBufferReads() {
+ return numBufferReads;
+ }
+
+ private void resetReadStats() {
+ numBytesRead = 0;
+ numSingleByteReads = 0;
+ numBufferReads = 0;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ if (buffer == null) {
+ sb.append("currentBuffer = null");
+ } else {
+ int pos = buffer.position();
+ int val;
+ if (pos >= buffer.limit()) {
+ val = -1;
+ } else {
+ val = buffer.get(pos);
+ }
+ String currentBufferState =
+ String.format("%d at pos: %d, lim: %d", val, pos, buffer.limit());
+ sb.append(String.format(
+ "block: %d, pos: %d (CBuf: %s)%n",
+ blockNumber(), absolute(),
+ currentBufferState));
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ private void throwIfInvalidBuffer() {
+ checkState(buffer != null, "'buffer' must not be null");
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java
new file mode 100644
index 0000000000000..9ce2dec5889f1
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/PrefetchingStatistics.java
@@ -0,0 +1,67 @@
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.time.Duration;
+
+import org.apache.hadoop.fs.statistics.DurationTracker;
+import org.apache.hadoop.fs.statistics.IOStatisticsSource;
+
+public interface PrefetchingStatistics extends IOStatisticsSource {
+
+ /**
+ * A prefetch operation has started.
+ * @return duration tracker
+ */
+ DurationTracker prefetchOperationStarted();
+
+ /**
+ * A block has been saved to the file cache.
+ */
+ void blockAddedToFileCache();
+
+ /**
+ * A block has been removed from the file cache.
+ */
+ void blockRemovedFromFileCache();
+
+ /**
+ * A prefetch operation has completed.
+ */
+ void prefetchOperationCompleted();
+
+ /**
+ * An executor has been acquired, either for prefetching or caching.
+ * @param timeInQueue time taken to acquire an executor.
+ */
+ void executorAcquired(Duration timeInQueue);
+
+ /**
+ * A new buffer has been added to the buffer pool.
+ * @param size size of the new buffer
+ */
+ void memoryAllocated(int size);
+
+ /**
+ * Previously allocated memory has been freed.
+ * @param size size of memory freed.
+ */
+ void memoryFreed(int size);
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java
new file mode 100644
index 0000000000000..77e00972d08c3
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/ResourcePool.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+
+/**
+ * Manages a fixed pool of resources.
+ *
+ * Avoids creating a new resource if a previously created instance is already available.
+ */
+public abstract class ResourcePool implements Closeable {
+
+ /**
+ * Acquires a resource blocking if necessary until one becomes available.
+ *
+ * @return the acquired resource instance.
+ */
+ public abstract T acquire();
+
+ /**
+ * Acquires a resource blocking if one is immediately available. Otherwise returns null.
+
+ * @return the acquired resource instance (if immediately available) or null.
+ */
+ public abstract T tryAcquire();
+
+ /**
+ * Releases a previously acquired resource.
+ *
+ * @param item the resource to release.
+ */
+ public abstract void release(T item);
+
+ @Override
+ public void close() {
+ }
+
+ /**
+ * Derived classes may implement a way to cleanup each item.
+ *
+ * @param item the resource to close.
+ */
+ protected void close(T item) {
+ // Do nothing in this class. Allow overriding classes to take any cleanup action.
+ }
+
+ /**
+ * Derived classes must implement a way to create an instance of a resource.
+ *
+ * @return the created instance.
+ */
+ protected abstract T createNew();
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java
new file mode 100644
index 0000000000000..84c17ef9dde8a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Retryer.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkGreater;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger;
+
+/**
+ * Provides retry related functionality.
+ */
+public class Retryer {
+
+ /* Maximum amount of delay (in ms) before retry fails. */
+ private int maxDelay;
+
+ /* Per retry delay (in ms). */
+ private int perRetryDelay;
+
+ /**
+ * The time interval (in ms) at which status update would be made.
+ */
+ private int statusUpdateInterval;
+
+ /* Current delay. */
+ private int delay;
+
+ /**
+ * Initializes a new instance of the {@code Retryer} class.
+ *
+ * @param perRetryDelay per retry delay (in ms).
+ * @param maxDelay maximum amount of delay (in ms) before retry fails.
+ * @param statusUpdateInterval time interval (in ms) at which status update would be made.
+ *
+ * @throws IllegalArgumentException if perRetryDelay is zero or negative.
+ * @throws IllegalArgumentException if maxDelay is less than or equal to perRetryDelay.
+ * @throws IllegalArgumentException if statusUpdateInterval is zero or negative.
+ */
+ public Retryer(int perRetryDelay, int maxDelay, int statusUpdateInterval) {
+ checkPositiveInteger(perRetryDelay, "perRetryDelay");
+ checkGreater(maxDelay, "maxDelay", perRetryDelay, "perRetryDelay");
+ checkPositiveInteger(statusUpdateInterval, "statusUpdateInterval");
+
+ this.perRetryDelay = perRetryDelay;
+ this.maxDelay = maxDelay;
+ this.statusUpdateInterval = statusUpdateInterval;
+ }
+
+ /**
+ * Returns true if retrying should continue, false otherwise.
+ *
+ * @return true if the caller should retry, false otherwise.
+ */
+ public boolean continueRetry() {
+ if (this.delay >= this.maxDelay) {
+ return false;
+ }
+
+ try {
+ Thread.sleep(this.perRetryDelay);
+ } catch (InterruptedException e) {
+ // Ignore the exception as required by the semantic of this class;
+ }
+
+ this.delay += this.perRetryDelay;
+ return true;
+ }
+
+ /**
+ * Returns true if status update interval has been reached.
+ *
+ * @return true if status update interval has been reached.
+ */
+ public boolean updateStatus() {
+ return (this.delay > 0) && this.delay % this.statusUpdateInterval == 0;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java
new file mode 100644
index 0000000000000..c84335a763e87
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/SingleFilePerBlockCache.java
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.OpenOption;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.nio.file.attribute.FileAttribute;
+import java.nio.file.attribute.PosixFilePermission;
+import java.nio.file.attribute.PosixFilePermissions;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+
+/**
+ * Provides functionality necessary for caching blocks of data read from FileSystem.
+ * Each cache block is stored on the local disk as a separate file.
+ */
+public class SingleFilePerBlockCache implements BlockCache {
+ private static final Logger LOG = LoggerFactory.getLogger(SingleFilePerBlockCache.class);
+
+ /**
+ * Blocks stored in this cache.
+ */
+ private final Map blocks = new ConcurrentHashMap<>();
+
+ /**
+ * Number of times a block was read from this cache.
+ * Used for determining cache utilization factor.
+ */
+ private int numGets = 0;
+
+ private boolean closed;
+
+ private final PrefetchingStatistics prefetchingStatistics;
+
+ /**
+ * Cache entry.
+ * Each block is stored as a separate file.
+ */
+ private static final class Entry {
+ private final int blockNumber;
+ private final Path path;
+ private final int size;
+ private final long checksum;
+
+ Entry(int blockNumber, Path path, int size, long checksum) {
+ this.blockNumber = blockNumber;
+ this.path = path;
+ this.size = size;
+ this.checksum = checksum;
+ }
+
+ @Override
+ public String toString() {
+ return String.format(
+ "([%03d] %s: size = %d, checksum = %d)",
+ blockNumber, path, size, checksum);
+ }
+ }
+
+ /**
+ * Constructs an instance of a {@code SingleFilePerBlockCache}.
+ *
+ * @param prefetchingStatistics statistics for this stream.
+ */
+ public SingleFilePerBlockCache(PrefetchingStatistics prefetchingStatistics) {
+ this.prefetchingStatistics = requireNonNull(prefetchingStatistics);
+ }
+
+ /**
+ * Indicates whether the given block is in this cache.
+ */
+ @Override
+ public boolean containsBlock(int blockNumber) {
+ return blocks.containsKey(blockNumber);
+ }
+
+ /**
+ * Gets the blocks in this cache.
+ */
+ @Override
+ public Iterable blocks() {
+ return Collections.unmodifiableList(new ArrayList<>(blocks.keySet()));
+ }
+
+ /**
+ * Gets the number of blocks in this cache.
+ */
+ @Override
+ public int size() {
+ return blocks.size();
+ }
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * @throws IllegalArgumentException if buffer is null.
+ */
+ @Override
+ public void get(int blockNumber, ByteBuffer buffer) throws IOException {
+ if (closed) {
+ return;
+ }
+
+ checkNotNull(buffer, "buffer");
+
+ Entry entry = getEntry(blockNumber);
+ buffer.clear();
+ readFile(entry.path, buffer);
+ buffer.rewind();
+
+ validateEntry(entry, buffer);
+ }
+
+ protected int readFile(Path path, ByteBuffer buffer) throws IOException {
+ int numBytesRead = 0;
+ int numBytes;
+ FileChannel channel = FileChannel.open(path, StandardOpenOption.READ);
+ while ((numBytes = channel.read(buffer)) > 0) {
+ numBytesRead += numBytes;
+ }
+ buffer.limit(buffer.position());
+ channel.close();
+ return numBytesRead;
+ }
+
+ private Entry getEntry(int blockNumber) {
+ Validate.checkNotNegative(blockNumber, "blockNumber");
+
+ Entry entry = blocks.get(blockNumber);
+ if (entry == null) {
+ throw new IllegalStateException(String.format("block %d not found in cache", blockNumber));
+ }
+ numGets++;
+ return entry;
+ }
+
+ /**
+ * Puts the given block in this cache.
+ *
+ * @throws IllegalArgumentException if buffer is null.
+ * @throws IllegalArgumentException if buffer.limit() is zero or negative.
+ */
+ @Override
+ public void put(int blockNumber, ByteBuffer buffer) throws IOException {
+ if (closed) {
+ return;
+ }
+
+ checkNotNull(buffer, "buffer");
+
+ if (blocks.containsKey(blockNumber)) {
+ Entry entry = blocks.get(blockNumber);
+ validateEntry(entry, buffer);
+ return;
+ }
+
+ Validate.checkPositiveInteger(buffer.limit(), "buffer.limit()");
+
+ Path blockFilePath = getCacheFilePath();
+ long size = Files.size(blockFilePath);
+ if (size != 0) {
+ String message =
+ String.format("[%d] temp file already has data. %s (%d)",
+ blockNumber, blockFilePath, size);
+ throw new IllegalStateException(message);
+ }
+
+ writeFile(blockFilePath, buffer);
+ prefetchingStatistics.blockAddedToFileCache();
+ long checksum = BufferData.getChecksum(buffer);
+ Entry entry = new Entry(blockNumber, blockFilePath, buffer.limit(), checksum);
+ blocks.put(blockNumber, entry);
+ }
+
+ private static final Set extends OpenOption> CREATE_OPTIONS =
+ EnumSet.of(StandardOpenOption.WRITE,
+ StandardOpenOption.CREATE,
+ StandardOpenOption.TRUNCATE_EXISTING);
+
+ protected void writeFile(Path path, ByteBuffer buffer) throws IOException {
+ buffer.rewind();
+ WritableByteChannel writeChannel = Files.newByteChannel(path, CREATE_OPTIONS);
+ while (buffer.hasRemaining()) {
+ writeChannel.write(buffer);
+ }
+ writeChannel.close();
+ }
+
+ protected Path getCacheFilePath() throws IOException {
+ return getTempFilePath();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (closed) {
+ return;
+ }
+
+ closed = true;
+
+ LOG.info(getStats());
+ int numFilesDeleted = 0;
+
+ for (Entry entry : blocks.values()) {
+ try {
+ Files.deleteIfExists(entry.path);
+ prefetchingStatistics.blockRemovedFromFileCache();
+ numFilesDeleted++;
+ } catch (IOException e) {
+ // Ignore while closing so that we can delete as many cache files as possible.
+ }
+ }
+
+ if (numFilesDeleted > 0) {
+ LOG.info("Deleted {} cache files", numFilesDeleted);
+ }
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("stats: ");
+ sb.append(getStats());
+ sb.append(", blocks:[");
+ sb.append(getIntList(blocks()));
+ sb.append("]");
+ return sb.toString();
+ }
+
+ private void validateEntry(Entry entry, ByteBuffer buffer) {
+ if (entry.size != buffer.limit()) {
+ String message = String.format(
+ "[%d] entry.size(%d) != buffer.limit(%d)",
+ entry.blockNumber, entry.size, buffer.limit());
+ throw new IllegalStateException(message);
+ }
+
+ long checksum = BufferData.getChecksum(buffer);
+ if (entry.checksum != checksum) {
+ String message = String.format(
+ "[%d] entry.checksum(%d) != buffer checksum(%d)",
+ entry.blockNumber, entry.checksum, checksum);
+ throw new IllegalStateException(message);
+ }
+ }
+
+ /**
+ * Produces a human readable list of blocks for the purpose of logging.
+ * This method minimizes the length of returned list by converting
+ * a contiguous list of blocks into a range.
+ * for example,
+ * 1, 3, 4, 5, 6, 8 becomes 1, 3~6, 8
+ */
+ private String getIntList(Iterable nums) {
+ List numList = new ArrayList<>();
+ List numbers = new ArrayList();
+ for (Integer n : nums) {
+ numbers.add(n);
+ }
+ Collections.sort(numbers);
+
+ int index = 0;
+ while (index < numbers.size()) {
+ int start = numbers.get(index);
+ int prev = start;
+ int end = start;
+ while ((++index < numbers.size()) && ((end = numbers.get(index)) == prev + 1)) {
+ prev = end;
+ }
+
+ if (start == prev) {
+ numList.add(Integer.toString(start));
+ } else {
+ numList.add(String.format("%d~%d", start, prev));
+ }
+ }
+
+ return String.join(", ", numList);
+ }
+
+ private String getStats() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(String.format(
+ "#entries = %d, #gets = %d",
+ blocks.size(), numGets));
+ return sb.toString();
+ }
+
+ private static final String CACHE_FILE_PREFIX = "fs-cache-";
+
+ public static boolean isCacheSpaceAvailable(long fileSize) {
+ try {
+ Path cacheFilePath = getTempFilePath();
+ long freeSpace = new File(cacheFilePath.toString()).getUsableSpace();
+ LOG.info("fileSize = {}, freeSpace = {}", fileSize, freeSpace);
+ Files.deleteIfExists(cacheFilePath);
+ return fileSize < freeSpace;
+ } catch (IOException e) {
+ LOG.error("isCacheSpaceAvailable", e);
+ return false;
+ }
+ }
+
+ // The suffix (file extension) of each serialized index file.
+ private static final String BINARY_FILE_SUFFIX = ".bin";
+
+ // File attributes attached to any intermediate temporary file created during index creation.
+ private static final FileAttribute> TEMP_FILE_ATTRS =
+ PosixFilePermissions.asFileAttribute(EnumSet.of(PosixFilePermission.OWNER_READ,
+ PosixFilePermission.OWNER_WRITE));
+
+ private static Path getTempFilePath() throws IOException {
+ return Files.createTempFile(
+ CACHE_FILE_PREFIX,
+ BINARY_FILE_SUFFIX,
+ TEMP_FILE_ATTRS
+ );
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java
new file mode 100644
index 0000000000000..17a668a0d3bc3
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/Validate.java
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collection;
+
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+
+/**
+ * A superset of Validate class in Apache commons lang3.
+ *
+ * It provides consistent message strings for frequently encountered checks.
+ * That simplifies callers because they have to supply only the name of the argument
+ * that failed a check instead of having to supply the entire message.
+ */
+public final class Validate {
+
+ private Validate() {
+ }
+
+ /**
+ * Validates that the given reference argument is not null.
+ * @param obj the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNull(Object obj, String argName) {
+ checkArgument(obj != null, "'%s' must not be null.", argName);
+ }
+
+ /**
+ * Validates that the given integer argument is not zero or negative.
+ * @param value the argument value to validate
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkPositiveInteger(long value, String argName) {
+ checkArgument(value > 0, "'%s' must be a positive integer.", argName);
+ }
+
+ /**
+ * Validates that the given integer argument is not negative.
+ * @param value the argument value to validate
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNegative(long value, String argName) {
+ checkArgument(value >= 0, "'%s' must not be negative.", argName);
+ }
+
+ /**
+ * Validates that the expression (that checks a required field is present) is true.
+ * @param isPresent indicates whether the given argument is present.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkRequired(boolean isPresent, String argName) {
+ checkArgument(isPresent, "'%s' is required.", argName);
+ }
+
+ /**
+ * Validates that the expression (that checks a field is valid) is true.
+ * @param isValid indicates whether the given argument is valid.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkValid(boolean isValid, String argName) {
+ checkArgument(isValid, "'%s' is invalid.", argName);
+ }
+
+ /**
+ * Validates that the expression (that checks a field is valid) is true.
+ * @param isValid indicates whether the given argument is valid.
+ * @param argName the name of the argument being validated.
+ * @param validValues the list of values that are allowed.
+ */
+ public static void checkValid(boolean isValid,
+ String argName,
+ String validValues) {
+ checkArgument(isValid, "'%s' is invalid. Valid values are: %s.", argName,
+ validValues);
+ }
+
+ /**
+ * Validates that the given string is not null and has non-zero length.
+ * @param arg the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(String arg, String argName) {
+ checkNotNull(arg, argName);
+ checkArgument(
+ !arg.isEmpty(),
+ "'%s' must not be empty.",
+ argName);
+ }
+
+ /**
+ * Validates that the given array is not null and has at least one element.
+ * @param the type of array's elements.
+ * @param array the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(T[] array, String argName) {
+ checkNotNull(array, argName);
+ checkNotEmpty(array.length, argName);
+ }
+
+ /**
+ * Validates that the given array is not null and has at least one element.
+ * @param array the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(byte[] array, String argName) {
+ checkNotNull(array, argName);
+ checkNotEmpty(array.length, argName);
+ }
+
+ /**
+ * Validates that the given array is not null and has at least one element.
+ * @param array the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(short[] array, String argName) {
+ checkNotNull(array, argName);
+ checkNotEmpty(array.length, argName);
+ }
+
+ /**
+ * Validates that the given array is not null and has at least one element.
+ * @param array the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(int[] array, String argName) {
+ checkNotNull(array, argName);
+ checkNotEmpty(array.length, argName);
+ }
+
+ /**
+ * Validates that the given array is not null and has at least one element.
+ * @param array the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(long[] array, String argName) {
+ checkNotNull(array, argName);
+ checkNotEmpty(array.length, argName);
+ }
+
+ /**
+ * Validates that the given buffer is not null and has non-zero capacity.
+ * @param the type of iterable's elements.
+ * @param iter the argument reference to validate.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNotEmpty(Iterable iter,
+ String argName) {
+ checkNotNull(iter, argName);
+ int minNumElements = iter.iterator().hasNext() ? 1 : 0;
+ checkNotEmpty(minNumElements, argName);
+ }
+
+ /**
+ * Validates that the given set is not null and has an exact number of items.
+ * @param the type of collection's elements.
+ * @param collection the argument reference to validate.
+ * @param numElements the expected number of elements in the collection.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkNotNullAndNumberOfElements(
+ Collection collection, int numElements, String argName) {
+ checkNotNull(collection, argName);
+ checkArgument(
+ collection.size() == numElements,
+ "Number of elements in '%s' must be exactly %s, %s given.",
+ argName,
+ numElements,
+ collection.size()
+ );
+ }
+
+ /**
+ * Validates that the given two values are equal.
+ * @param value1 the first value to check.
+ * @param value1Name the name of the first argument.
+ * @param value2 the second value to check.
+ * @param value2Name the name of the second argument.
+ */
+ public static void checkValuesEqual(
+ long value1,
+ String value1Name,
+ long value2,
+ String value2Name) {
+ checkArgument(
+ value1 == value2,
+ "'%s' (%s) must equal '%s' (%s).",
+ value1Name,
+ value1,
+ value2Name,
+ value2);
+ }
+
+ /**
+ * Validates that the first value is an integer multiple of the second value.
+ * @param value1 the first value to check.
+ * @param value1Name the name of the first argument.
+ * @param value2 the second value to check.
+ * @param value2Name the name of the second argument.
+ */
+ public static void checkIntegerMultiple(
+ long value1,
+ String value1Name,
+ long value2,
+ String value2Name) {
+ checkArgument(
+ (value1 % value2) == 0,
+ "'%s' (%s) must be an integer multiple of '%s' (%s).",
+ value1Name,
+ value1,
+ value2Name,
+ value2);
+ }
+
+ /**
+ * Validates that the first value is greater than the second value.
+ * @param value1 the first value to check.
+ * @param value1Name the name of the first argument.
+ * @param value2 the second value to check.
+ * @param value2Name the name of the second argument.
+ */
+ public static void checkGreater(
+ long value1,
+ String value1Name,
+ long value2,
+ String value2Name) {
+ checkArgument(
+ value1 > value2,
+ "'%s' (%s) must be greater than '%s' (%s).",
+ value1Name,
+ value1,
+ value2Name,
+ value2);
+ }
+
+ /**
+ * Validates that the first value is greater than or equal to the second value.
+ * @param value1 the first value to check.
+ * @param value1Name the name of the first argument.
+ * @param value2 the second value to check.
+ * @param value2Name the name of the second argument.
+ */
+ public static void checkGreaterOrEqual(
+ long value1,
+ String value1Name,
+ long value2,
+ String value2Name) {
+ checkArgument(
+ value1 >= value2,
+ "'%s' (%s) must be greater than or equal to '%s' (%s).",
+ value1Name,
+ value1,
+ value2Name,
+ value2);
+ }
+
+ /**
+ * Validates that the first value is less than or equal to the second value.
+ * @param value1 the first value to check.
+ * @param value1Name the name of the first argument.
+ * @param value2 the second value to check.
+ * @param value2Name the name of the second argument.
+ */
+ public static void checkLessOrEqual(
+ long value1,
+ String value1Name,
+ long value2,
+ String value2Name) {
+ checkArgument(
+ value1 <= value2,
+ "'%s' (%s) must be less than or equal to '%s' (%s).",
+ value1Name,
+ value1,
+ value2Name,
+ value2);
+ }
+
+ /**
+ * Validates that the given value is within the given range of values.
+ * @param value the value to check.
+ * @param valueName the name of the argument.
+ * @param minValueInclusive inclusive lower limit for the value.
+ * @param maxValueInclusive inclusive upper limit for the value.
+ */
+ public static void checkWithinRange(
+ long value,
+ String valueName,
+ long minValueInclusive,
+ long maxValueInclusive) {
+ checkArgument(
+ (value >= minValueInclusive) && (value <= maxValueInclusive),
+ "'%s' (%s) must be within the range [%s, %s].",
+ valueName,
+ value,
+ minValueInclusive,
+ maxValueInclusive);
+ }
+
+ /**
+ * Validates that the given value is within the given range of values.
+ * @param value the value to check.
+ * @param valueName the name of the argument.
+ * @param minValueInclusive inclusive lower limit for the value.
+ * @param maxValueInclusive inclusive upper limit for the value.
+ */
+ public static void checkWithinRange(
+ double value,
+ String valueName,
+ double minValueInclusive,
+ double maxValueInclusive) {
+ checkArgument(
+ (value >= minValueInclusive) && (value <= maxValueInclusive),
+ "'%s' (%s) must be within the range [%s, %s].",
+ valueName,
+ value,
+ minValueInclusive,
+ maxValueInclusive);
+ }
+
+ /**
+ * Validates that the given path exists.
+ * @param path the path to check.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkPathExists(Path path, String argName) {
+ checkNotNull(path, argName);
+ checkArgument(Files.exists(path), "Path %s (%s) does not exist.", argName,
+ path);
+ }
+
+ /**
+ * Validates that the given path exists and is a directory.
+ * @param path the path to check.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkPathExistsAsDir(Path path, String argName) {
+ checkPathExists(path, argName);
+ checkArgument(
+ Files.isDirectory(path),
+ "Path %s (%s) must point to a directory.",
+ argName,
+ path);
+ }
+
+ /**
+ * Validates that the given path exists and is a file.
+ * @param path the path to check.
+ * @param argName the name of the argument being validated.
+ */
+ public static void checkPathExistsAsFile(Path path, String argName) {
+ checkPathExists(path, argName);
+ checkArgument(Files.isRegularFile(path),
+ "Path %s (%s) must point to a file.", argName, path);
+ }
+
+
+ /**
+ * Check state.
+ * @param expression expression which must hold.
+ * @param format format string
+ * @param args arguments for the error string
+ * @throws IllegalStateException if the state is not valid.
+ */
+ public static void checkState(boolean expression,
+ String format,
+ Object... args) {
+ if (!expression) {
+ throw new IllegalStateException(String.format(format, args));
+ }
+ }
+
+ private static void checkNotEmpty(int arraySize, String argName) {
+ checkArgument(
+ arraySize > 0,
+ "'%s' must have at least one element.",
+ argName);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java
new file mode 100644
index 0000000000000..1b26da85d95fb
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/package-info.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * block caching for use in object store clients.
+ */
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+package org.apache.hadoop.fs.impl.prefetch;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java
new file mode 100644
index 0000000000000..557c57ea4d661
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/IOStatisticsContext.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics;
+
+import org.apache.hadoop.fs.statistics.impl.IOStatisticsContextIntegration;
+
+/**
+ * An interface defined to capture thread-level IOStatistics by using per
+ * thread context.
+ *
+ * The aggregator should be collected in their constructor by statistics-generating
+ * classes to obtain the aggregator to update across all threads.
+ *
+ * The {@link #snapshot()} call creates a snapshot of the statistics;
+ *
+ * The {@link #reset()} call resets the statistics in the context so
+ * that later snapshots will get the incremental data.
+ */
+public interface IOStatisticsContext extends IOStatisticsSource {
+
+ /**
+ * Get the IOStatisticsAggregator for the context.
+ *
+ * @return return the aggregator for the context.
+ */
+ IOStatisticsAggregator getAggregator();
+
+ /**
+ * Capture the snapshot of the context's IOStatistics.
+ *
+ * @return IOStatisticsSnapshot for the context.
+ */
+ IOStatisticsSnapshot snapshot();
+
+ /**
+ * Get a unique ID for this context, for logging
+ * purposes.
+ *
+ * @return an ID unique for all contexts in this process.
+ */
+ long getID();
+
+ /**
+ * Reset the context's IOStatistics.
+ */
+ void reset();
+
+ /**
+ * Get the context's IOStatisticsContext.
+ *
+ * @return instance of IOStatisticsContext for the context.
+ */
+ static IOStatisticsContext getCurrentIOStatisticsContext() {
+ return IOStatisticsContextIntegration.getCurrentIOStatisticsContext();
+ }
+
+ /**
+ * Set the IOStatisticsContext for the current thread.
+ * @param statisticsContext IOStatistics context instance for the
+ * current thread. If null, the context is reset.
+ */
+ static void setThreadIOStatisticsContext(
+ IOStatisticsContext statisticsContext) {
+ IOStatisticsContextIntegration.setThreadIOStatisticsContext(
+ statisticsContext);
+ }
+
+ /**
+ * Static probe to check if the thread-level IO statistics enabled.
+ *
+ * @return if the thread-level IO statistics enabled.
+ */
+ static boolean enabled() {
+ return IOStatisticsContextIntegration.isIOStatisticsThreadLevelEnabled();
+ }
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java
index ca755f0841914..50bbf45505cec 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StreamStatisticNames.java
@@ -47,7 +47,7 @@ public final class StreamStatisticNames {
public static final String STREAM_READ_ABORTED = "stream_aborted";
/**
- * Bytes read from an input stream in read() calls.
+ * Bytes read from an input stream in read()/readVectored() calls.
* Does not include bytes read and then discarded in seek/close etc.
* These are the bytes returned to the caller.
* Value: {@value}.
@@ -110,6 +110,34 @@ public final class StreamStatisticNames {
public static final String STREAM_READ_OPERATIONS =
"stream_read_operations";
+ /**
+ * Count of readVectored() operations in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_VECTORED_OPERATIONS =
+ "stream_read_vectored_operations";
+
+ /**
+ * Count of bytes discarded during readVectored() operation
+ * in an input stream.
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_VECTORED_READ_BYTES_DISCARDED =
+ "stream_read_vectored_read_bytes_discarded";
+
+ /**
+ * Count of incoming file ranges during readVectored() operation.
+ * Value: {@value}
+ */
+ public static final String STREAM_READ_VECTORED_INCOMING_RANGES =
+ "stream_read_vectored_incoming_ranges";
+ /**
+ * Count of combined file ranges during readVectored() operation.
+ * Value: {@value}
+ */
+ public static final String STREAM_READ_VECTORED_COMBINED_RANGES =
+ "stream_read_vectored_combined_ranges";
+
/**
* Count of incomplete read() operations in an input stream,
* that is, when the bytes returned were less than that requested.
@@ -387,6 +415,46 @@ public final class StreamStatisticNames {
public static final String BLOCKS_RELEASED
= "blocks_released";
+ /**
+ * Total number of prefetching operations executed.
+ */
+ public static final String STREAM_READ_PREFETCH_OPERATIONS
+ = "stream_read_prefetch_operations";
+
+ /**
+ * Total number of block in disk cache.
+ */
+ public static final String STREAM_READ_BLOCKS_IN_FILE_CACHE
+ = "stream_read_blocks_in_cache";
+
+ /**
+ * Total number of active prefetch operations.
+ */
+ public static final String STREAM_READ_ACTIVE_PREFETCH_OPERATIONS
+ = "stream_read_active_prefetch_operations";
+
+ /**
+ * Total bytes of memory in use by this input stream.
+ */
+ public static final String STREAM_READ_ACTIVE_MEMORY_IN_USE
+ = "stream_read_active_memory_in_use";
+
+ /**
+ * count/duration of reading a remote block.
+ *
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_REMOTE_BLOCK_READ
+ = "stream_read_block_read";
+
+ /**
+ * count/duration of acquiring a buffer and reading to it.
+ *
+ * Value: {@value}.
+ */
+ public static final String STREAM_READ_BLOCK_ACQUIRE_AND_READ
+ = "stream_read_block_acquire_read";
+
private StreamStatisticNames() {
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java
new file mode 100644
index 0000000000000..b672f6639cb93
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/EmptyIOStatisticsContextImpl.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics.impl;
+
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
+import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot;
+
+/**
+ * Empty IOStatistics context which serves no-op for all the operations and
+ * returns an empty Snapshot if asked.
+ *
+ */
+final class EmptyIOStatisticsContextImpl implements IOStatisticsContext {
+
+ private static final IOStatisticsContext EMPTY_CONTEXT = new EmptyIOStatisticsContextImpl();
+
+ private EmptyIOStatisticsContextImpl() {
+ }
+
+ /**
+ * Create a new empty snapshot.
+ * A new one is always created for isolation.
+ *
+ * @return a statistics snapshot
+ */
+ @Override
+ public IOStatisticsSnapshot snapshot() {
+ return new IOStatisticsSnapshot();
+ }
+
+ @Override
+ public IOStatisticsAggregator getAggregator() {
+ return EmptyIOStatisticsStore.getInstance();
+ }
+
+ @Override
+ public IOStatistics getIOStatistics() {
+ return EmptyIOStatistics.getInstance();
+ }
+
+ @Override
+ public void reset() {}
+
+ /**
+ * The ID is always 0.
+ * As the real context implementation counter starts at 1,
+ * we are guaranteed to have unique IDs even between them and
+ * the empty context.
+ * @return 0
+ */
+ @Override
+ public long getID() {
+ return 0;
+ }
+
+ /**
+ * Get the single instance.
+ * @return an instance.
+ */
+ static IOStatisticsContext getInstance() {
+ return EMPTY_CONTEXT;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java
new file mode 100644
index 0000000000000..97a85281c4fb8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextImpl.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics.impl;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
+import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot;
+
+/**
+ * Implementing the IOStatisticsContext.
+ *
+ * A Context defined for IOStatistics collection per thread which captures
+ * each worker thread's work in FS streams and stores it in the form of
+ * IOStatisticsSnapshot.
+ *
+ * For the current thread the IOStatisticsSnapshot can be used as a way to
+ * move the IOStatistics data between applications using the Serializable
+ * nature of the class.
+ */
+public final class IOStatisticsContextImpl implements IOStatisticsContext {
+ private static final Logger LOG =
+ LoggerFactory.getLogger(IOStatisticsContextImpl.class);
+
+ /**
+ * Thread ID.
+ */
+ private final long threadId;
+
+ /**
+ * Unique ID.
+ */
+ private final long id;
+
+ /**
+ * IOStatistics to aggregate.
+ */
+ private final IOStatisticsSnapshot ioStatistics = new IOStatisticsSnapshot();
+
+ /**
+ * Constructor.
+ * @param threadId thread ID
+ * @param id instance ID.
+ */
+ public IOStatisticsContextImpl(final long threadId, final long id) {
+ this.threadId = threadId;
+ this.id = id;
+ }
+
+ @Override
+ public String toString() {
+ return "IOStatisticsContextImpl{" +
+ "id=" + id +
+ ", threadId=" + threadId +
+ ", ioStatistics=" + ioStatistics +
+ '}';
+ }
+
+ /**
+ * Get the IOStatisticsAggregator of the context.
+ * @return the instance of IOStatisticsAggregator for this context.
+ */
+ @Override
+ public IOStatisticsAggregator getAggregator() {
+ return ioStatistics;
+ }
+
+ /**
+ * Returns a snapshot of the current thread's IOStatistics.
+ *
+ * @return IOStatisticsSnapshot of the context.
+ */
+ @Override
+ public IOStatisticsSnapshot snapshot() {
+ LOG.debug("Taking snapshot of IOStatisticsContext id {}", id);
+ return new IOStatisticsSnapshot(ioStatistics);
+ }
+
+ /**
+ * Reset the thread +.
+ */
+ @Override
+ public void reset() {
+ LOG.debug("clearing IOStatisticsContext id {}", id);
+ ioStatistics.clear();
+ }
+
+ @Override
+ public IOStatistics getIOStatistics() {
+ return ioStatistics;
+ }
+
+ /**
+ * ID of this context.
+ * @return ID.
+ */
+ @Override
+ public long getID() {
+ return id;
+ }
+
+ /**
+ * Get the thread ID.
+ * @return thread ID.
+ */
+ public long getThreadID() {
+ return threadId;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java
new file mode 100644
index 0000000000000..2a394e6a1cdf1
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/IOStatisticsContextIntegration.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.statistics.impl;
+
+import java.lang.ref.WeakReference;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.impl.WeakReferenceThreadMap;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
+
+import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_THREAD_LEVEL_ENABLED;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT;
+
+/**
+ * A Utility class for IOStatisticsContext, which helps in creating and
+ * getting the current active context. Static methods in this class allows to
+ * get the current context to start aggregating the IOStatistics.
+ *
+ * Static initializer is used to work out if the feature to collect
+ * thread-level IOStatistics is enabled or not and the corresponding
+ * implementation class is called for it.
+ *
+ * Weak Reference thread map to be used to keep track of different context's
+ * to avoid long-lived memory leakages as these references would be cleaned
+ * up at GC.
+ */
+public final class IOStatisticsContextIntegration {
+
+ private static final Logger LOG =
+ LoggerFactory.getLogger(IOStatisticsContextIntegration.class);
+
+ /**
+ * Is thread-level IO Statistics enabled?
+ */
+ private static boolean isThreadIOStatsEnabled;
+
+ /**
+ * ID for next instance to create.
+ */
+ public static final AtomicLong INSTANCE_ID = new AtomicLong(1);
+
+ /**
+ * Active IOStatistics Context containing different worker thread's
+ * statistics. Weak Reference so that it gets cleaned up during GC and we
+ * avoid any memory leak issues due to long lived references.
+ */
+ private static final WeakReferenceThreadMap
+ ACTIVE_IOSTATS_CONTEXT =
+ new WeakReferenceThreadMap<>(
+ IOStatisticsContextIntegration::createNewInstance,
+ IOStatisticsContextIntegration::referenceLostContext
+ );
+
+ static {
+ // Work out if the current context has thread level IOStatistics enabled.
+ final Configuration configuration = new Configuration();
+ isThreadIOStatsEnabled =
+ configuration.getBoolean(IOSTATISTICS_THREAD_LEVEL_ENABLED,
+ IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT);
+ }
+
+ /**
+ * Static probe to check if the thread-level IO statistics enabled.
+ *
+ * @return if the thread-level IO statistics enabled.
+ */
+ public static boolean isIOStatisticsThreadLevelEnabled() {
+ return isThreadIOStatsEnabled;
+ }
+
+ /**
+ * Private constructor for a utility class to be used in IOStatisticsContext.
+ */
+ private IOStatisticsContextIntegration() {}
+
+ /**
+ * Creating a new IOStatisticsContext instance for a FS to be used.
+ * @param key Thread ID that represents which thread the context belongs to.
+ * @return an instance of IOStatisticsContext.
+ */
+ private static IOStatisticsContext createNewInstance(Long key) {
+ return new IOStatisticsContextImpl(key, INSTANCE_ID.getAndIncrement());
+ }
+
+ /**
+ * In case of reference loss for IOStatisticsContext.
+ * @param key ThreadID.
+ */
+ private static void referenceLostContext(Long key) {
+ LOG.debug("Reference lost for threadID for the context: {}", key);
+ }
+
+ /**
+ * Get the current thread's IOStatisticsContext instance. If no instance is
+ * present for this thread ID, create one using the factory.
+ * @return instance of IOStatisticsContext.
+ */
+ public static IOStatisticsContext getCurrentIOStatisticsContext() {
+ return isThreadIOStatsEnabled
+ ? ACTIVE_IOSTATS_CONTEXT.getForCurrentThread()
+ : EmptyIOStatisticsContextImpl.getInstance();
+ }
+
+ /**
+ * Set the IOStatisticsContext for the current thread.
+ * @param statisticsContext IOStatistics context instance for the
+ * current thread. If null, the context is reset.
+ */
+ public static void setThreadIOStatisticsContext(
+ IOStatisticsContext statisticsContext) {
+ if (isThreadIOStatsEnabled) {
+ if (statisticsContext == null) {
+ ACTIVE_IOSTATS_CONTEXT.removeForCurrentThread();
+ }
+ if (ACTIVE_IOSTATS_CONTEXT.getForCurrentThread() != statisticsContext) {
+ ACTIVE_IOSTATS_CONTEXT.setForCurrentThread(statisticsContext);
+ }
+ }
+ }
+
+ /**
+ * Get thread ID specific IOStatistics values if
+ * statistics are enabled and the thread ID is in the map.
+ * @param testThreadId thread ID.
+ * @return IOStatisticsContext if found in the map.
+ */
+ @VisibleForTesting
+ public static IOStatisticsContext getThreadSpecificIOStatisticsContext(long testThreadId) {
+ LOG.debug("IOStatsContext thread ID required: {}", testThreadId);
+
+ if (!isThreadIOStatsEnabled) {
+ return null;
+ }
+ // lookup the weakRef IOStatisticsContext for the thread ID in the
+ // ThreadMap.
+ WeakReference ioStatisticsSnapshotWeakReference =
+ ACTIVE_IOSTATS_CONTEXT.lookup(testThreadId);
+ if (ioStatisticsSnapshotWeakReference != null) {
+ return ioStatisticsSnapshotWeakReference.get();
+ }
+ return null;
+ }
+
+ /**
+ * A method to enable IOStatisticsContext to override if set otherwise in
+ * the configurations for tests.
+ */
+ @VisibleForTesting
+ public static void enableIOStatisticsContext() {
+ if (!isThreadIOStatsEnabled) {
+ LOG.info("Enabling Thread IOStatistics..");
+ isThreadIOStatsEnabled = true;
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
index 2928f88598207..178f761191b1e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java
@@ -97,7 +97,7 @@
import org.eclipse.jetty.server.Server;
import org.eclipse.jetty.server.ServerConnector;
import org.eclipse.jetty.server.SslConnectionFactory;
-import org.eclipse.jetty.server.handler.AllowSymLinkAliasChecker;
+import org.eclipse.jetty.server.SymlinkAllowedResourceAliasChecker;
import org.eclipse.jetty.server.handler.ContextHandlerCollection;
import org.eclipse.jetty.server.handler.HandlerCollection;
import org.eclipse.jetty.server.handler.RequestLogHandler;
@@ -144,7 +144,7 @@ public final class HttpServer2 implements FilterContainer {
public static final String HTTP_SOCKET_BACKLOG_SIZE_KEY =
"hadoop.http.socket.backlog.size";
- public static final int HTTP_SOCKET_BACKLOG_SIZE_DEFAULT = 128;
+ public static final int HTTP_SOCKET_BACKLOG_SIZE_DEFAULT = 500;
public static final String HTTP_MAX_THREADS_KEY = "hadoop.http.max.threads";
public static final String HTTP_ACCEPTOR_COUNT_KEY =
"hadoop.http.acceptor.count";
@@ -939,7 +939,7 @@ protected void addDefaultApps(ContextHandlerCollection parent,
handler.setHttpOnly(true);
handler.getSessionCookieConfig().setSecure(true);
logContext.setSessionHandler(handler);
- logContext.addAliasCheck(new AllowSymLinkAliasChecker());
+ logContext.addAliasCheck(new SymlinkAllowedResourceAliasChecker(logContext));
setContextAttributes(logContext, conf);
addNoCacheFilter(logContext);
defaultContexts.put(logContext, true);
@@ -958,7 +958,7 @@ protected void addDefaultApps(ContextHandlerCollection parent,
handler.setHttpOnly(true);
handler.getSessionCookieConfig().setSecure(true);
staticContext.setSessionHandler(handler);
- staticContext.addAliasCheck(new AllowSymLinkAliasChecker());
+ staticContext.addAliasCheck(new SymlinkAllowedResourceAliasChecker(staticContext));
setContextAttributes(staticContext, conf);
defaultContexts.put(staticContext, true);
}
@@ -1967,4 +1967,8 @@ HttpServer2Metrics getMetrics() {
return metrics;
}
+ @VisibleForTesting
+ List getListeners() {
+ return listeners;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java
new file mode 100644
index 0000000000000..993d2678d2a10
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/AlreadyClosedException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io.compress;
+
+import java.io.IOException;
+
+/**
+ * An exception class for when a closed compressor/decopressor is being used
+ * {@link org.apache.hadoop.io.compress.Compressor}
+ * {@link org.apache.hadoop.io.compress.Decompressor}
+ */
+public class AlreadyClosedException extends IOException {
+
+ public AlreadyClosedException(String message) {
+ super(message);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
index 7640f7ed7a6f7..1564ae9085520 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java
@@ -24,6 +24,7 @@
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
@@ -255,10 +256,7 @@ public BZip2CompressionOutputStream(OutputStream out)
private void writeStreamHeader() throws IOException {
if (super.out != null) {
- // The compressed bzip2 stream should start with the
- // identifying characters BZ. Caller of CBZip2OutputStream
- // i.e. this class must write these characters.
- out.write(HEADER.getBytes(StandardCharsets.UTF_8));
+ writeHeader(out);
}
}
@@ -547,4 +545,11 @@ private void updatePos(boolean shouldAddOn) {
}// end of BZip2CompressionInputStream
+ @VisibleForTesting
+ public static void writeHeader(OutputStream out) throws IOException {
+ // The compressed bzip2 stream should start with the
+ // identifying characters BZ. Caller of CBZip2OutputStream
+ // i.e. this class must write these characters.
+ out.write(HEADER.getBytes(StandardCharsets.UTF_8));
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java
index 1f095c6c6736e..5b1826f9e30a8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java
@@ -205,6 +205,7 @@ public static void returnCompressor(Compressor compressor) {
}
// if the compressor can't be reused, don't pool it.
if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
+ compressor.end();
return;
}
compressor.reset();
@@ -225,6 +226,7 @@ public static void returnDecompressor(Decompressor decompressor) {
}
// if the decompressor can't be reused, don't pool it.
if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
+ decompressor.end();
return;
}
decompressor.reset();
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
index 187fe481588c8..61e88d80d8ce4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java
@@ -27,6 +27,7 @@
import java.io.InputStream;
import java.io.IOException;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.io.compress.SplittableCompressionCodec.READ_MODE;
@@ -312,13 +313,24 @@ private CBZip2InputStream(final InputStream in, READ_MODE readMode, boolean skip
}
} else if (readMode == READ_MODE.BYBLOCK) {
this.currentState = STATE.NO_PROCESS_STATE;
- skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER,DELIMITER_BIT_LENGTH);
+ skipResult = skipToNextBlockMarker();
if(!skipDecompression){
changeStateToProcessABlock();
}
}
}
+ /**
+ * Skips bytes in the stream until the start marker of a block is reached
+ * or end of stream is reached. Used for testing purposes to identify the
+ * start offsets of blocks.
+ */
+ @VisibleForTesting
+ boolean skipToNextBlockMarker() throws IOException {
+ return skipToNextMarker(
+ CBZip2InputStream.BLOCK_DELIMITER, DELIMITER_BIT_LENGTH);
+ }
+
/**
* Returns the number of bytes between the current stream position
* and the immediate next BZip2 block marker.
@@ -428,7 +440,7 @@ public int read(final byte[] dest, final int offs, final int len)
//report 'end of block' or 'end of stream'
result = b;
- skipResult = this.skipToNextMarker(CBZip2InputStream.BLOCK_DELIMITER, DELIMITER_BIT_LENGTH);
+ skipResult = skipToNextBlockMarker();
changeStateToProcessABlock();
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
index 39c3638b0f497..50bdddb8136fc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java
@@ -27,6 +27,7 @@
import java.io.OutputStream;
import java.io.IOException;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.io.IOUtils;
/**
@@ -781,8 +782,7 @@ private void initBlock() {
inUse[i] = false;
}
- /* 20 is just a paranoia constant */
- this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
+ this.allowableBlockSize = getAllowableBlockSize(this.blockSize100k);
}
private void endBlock() throws IOException {
@@ -2093,4 +2093,9 @@ private static final class Data extends Object {
}
+ @VisibleForTesting
+ static int getAllowableBlockSize(int blockSize100k) {
+ /* 20 is just a paranoia constant */
+ return (blockSize100k * BZip2Constants.baseBlockSize) - 20;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipCompressor.java
index fcb431dce86ca..d44413cc30912 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipCompressor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipCompressor.java
@@ -24,6 +24,7 @@
import java.util.zip.GZIPOutputStream;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.AlreadyClosedException;
import org.apache.hadoop.io.compress.Compressor;
import org.apache.hadoop.io.compress.DoNotPool;
import org.apache.hadoop.util.DataChecksum;
@@ -83,6 +84,10 @@ public int compress(byte[] b, int off, int len) throws IOException {
throw new IOException("compress called on finished compressor");
}
+ if (state == BuiltInGzipDecompressor.GzipStateLabel.ENDED) {
+ throw new AlreadyClosedException("compress called on closed compressor");
+ }
+
int compressedBytesWritten = 0;
// If we are not within uncompressed data yet, output the header.
@@ -139,6 +144,8 @@ public long getBytesWritten() {
@Override
public void end() {
deflater.end();
+
+ state = BuiltInGzipDecompressor.GzipStateLabel.ENDED;
}
@Override
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java
index 47c21b4e3ea98..d47864a71f481 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInGzipDecompressor.java
@@ -23,6 +23,7 @@
import java.util.zip.DataFormatException;
import java.util.zip.Inflater;
+import org.apache.hadoop.io.compress.AlreadyClosedException;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.io.compress.DoNotPool;
import org.apache.hadoop.util.DataChecksum;
@@ -109,7 +110,11 @@ public enum GzipStateLabel {
* Immediately after the trailer (and potentially prior to the next gzip
* member/substream header), without reset() having been called.
*/
- FINISHED;
+ FINISHED,
+ /**
+ * Immediately after end() has been called.
+ */
+ ENDED;
}
/**
@@ -186,6 +191,10 @@ public synchronized int decompress(byte[] b, int off, int len)
throws IOException {
int numAvailBytes = 0;
+ if (state == GzipStateLabel.ENDED) {
+ throw new AlreadyClosedException("decompress called on closed decompressor");
+ }
+
if (state != GzipStateLabel.DEFLATE_STREAM) {
executeHeaderState();
@@ -476,6 +485,8 @@ public synchronized void reset() {
@Override
public synchronized void end() {
inflater.end();
+
+ state = GzipStateLabel.ENDED;
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index 3960b189665f6..9707ee388e1d2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -46,6 +46,10 @@ public class RetryInvocationHandler implements RpcInvocationHandler {
public static final Logger LOG = LoggerFactory.getLogger(
RetryInvocationHandler.class);
+ @VisibleForTesting
+ public static final ThreadLocal SET_CALL_ID_FOR_TEST =
+ ThreadLocal.withInitial(() -> true);
+
static class Call {
private final Method method;
private final Object[] args;
@@ -159,7 +163,7 @@ CallReturn invoke() throws Throwable {
}
Object invokeMethod() throws Throwable {
- if (isRpc) {
+ if (isRpc && SET_CALL_ID_FOR_TEST.get()) {
Client.setCallIdAndRetryCount(callId, counters.retries,
retryInvocationHandler.asyncCallHandler);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
index d7693f868eb30..e14459123016b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
@@ -181,15 +181,20 @@ public static final RetryPolicy retryByRemoteException(
}
/**
- * A retry policy for exceptions other than RemoteException.
+ *
+ * A retry policy where RemoteException and SaslException are not retried, other individual
+ * exception types can have RetryPolicy overrides, and any other exception type without an
+ * override is not retried.
+ *
+ *
* @param defaultPolicy defaultPolicy.
* @param exceptionToPolicyMap exceptionToPolicyMap.
* @return RetryPolicy.
*/
- public static final RetryPolicy retryOtherThanRemoteException(
+ public static final RetryPolicy retryOtherThanRemoteAndSaslException(
RetryPolicy defaultPolicy,
Map, RetryPolicy> exceptionToPolicyMap) {
- return new OtherThanRemoteExceptionDependentRetry(defaultPolicy,
+ return new OtherThanRemoteAndSaslExceptionDependentRetry(defaultPolicy,
exceptionToPolicyMap);
}
@@ -589,12 +594,12 @@ public RetryAction shouldRetry(Exception e, int retries, int failovers,
}
}
- static class OtherThanRemoteExceptionDependentRetry implements RetryPolicy {
+ static class OtherThanRemoteAndSaslExceptionDependentRetry implements RetryPolicy {
private RetryPolicy defaultPolicy;
private Map, RetryPolicy> exceptionToPolicyMap;
- public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
+ OtherThanRemoteAndSaslExceptionDependentRetry(RetryPolicy defaultPolicy,
Map,
RetryPolicy> exceptionToPolicyMap) {
this.defaultPolicy = defaultPolicy;
@@ -605,10 +610,8 @@ public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
public RetryAction shouldRetry(Exception e, int retries, int failovers,
boolean isIdempotentOrAtMostOnce) throws Exception {
RetryPolicy policy = null;
- // ignore Remote Exception
- if (e instanceof RemoteException) {
- // do nothing
- } else {
+ // ignore RemoteException and SaslException
+ if (!(e instanceof RemoteException || isSaslFailure(e))) {
policy = exceptionToPolicyMap.get(e.getClass());
}
if (policy == null) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java
index 3d309235fe891..8d43fd74a843c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/AlignmentContext.java
@@ -46,7 +46,7 @@ public interface AlignmentContext {
void updateResponseState(RpcResponseHeaderProto.Builder header);
/**
- * This is the intended client method call to implement to recieve state info
+ * This is the intended client method call to implement to receive state info
* during RPC response processing.
*
* @param header The RPC response header.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
index dbd9184a2b91e..98d7e82c70e0a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallerContext.java
@@ -47,6 +47,8 @@ public final class CallerContext {
// field names
public static final String CLIENT_IP_STR = "clientIp";
public static final String CLIENT_PORT_STR = "clientPort";
+ public static final String CLIENT_ID_STR = "clientId";
+ public static final String CLIENT_CALL_ID_STR = "clientCallId";
/** The caller context.
*
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 2fe8aca85ed9a..20fc9efe57e0a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -61,6 +61,7 @@
import javax.net.SocketFactory;
import javax.security.sasl.Sasl;
+import javax.security.sasl.SaslException;
import java.io.*;
import java.net.*;
import java.nio.ByteBuffer;
@@ -418,7 +419,7 @@ public synchronized Writable getRpcResponse() {
* socket: responses may be delivered out of order. */
private class Connection extends Thread {
private InetSocketAddress server; // server ip:port
- private final ConnectionId remoteId; // connection id
+ private final ConnectionId remoteId; // connection id
private AuthMethod authMethod; // authentication method
private AuthProtocol authProtocol;
private int serviceClass;
@@ -644,6 +645,9 @@ private synchronized boolean updateAddress() throws IOException {
LOG.warn("Address change detected. Old: " + server.toString() +
" New: " + currentAddr.toString());
server = currentAddr;
+ // Update the remote address so that reconnections are with the updated address.
+ // This avoids thrashing.
+ remoteId.setAddress(currentAddr);
UserGroupInformation ticket = remoteId.getTicket();
this.setName("IPC Client (" + socketFactory.hashCode()
+ ") connection to " + server.toString() + " from "
@@ -1620,7 +1624,8 @@ private Writable getRpcResponse(final Call call, final Connection connection,
}
if (call.error != null) {
- if (call.error instanceof RemoteException) {
+ if (call.error instanceof RemoteException ||
+ call.error instanceof SaslException) {
call.error.fillInStackTrace();
throw call.error;
} else { // local exception
@@ -1698,9 +1703,9 @@ private Connection getConnection(ConnectionId remoteId,
@InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
@InterfaceStability.Evolving
public static class ConnectionId {
- InetSocketAddress address;
- UserGroupInformation ticket;
- final Class> protocol;
+ private InetSocketAddress address;
+ private final UserGroupInformation ticket;
+ private final Class> protocol;
private static final int PRIME = 16777619;
private final int rpcTimeout;
private final int maxIdleTime; //connections will be culled if it was idle for
@@ -1715,8 +1720,8 @@ public static class ConnectionId {
private final int pingInterval; // how often sends ping to the server in msecs
private String saslQop; // here for testing
private final Configuration conf; // used to get the expected kerberos principal name
-
- ConnectionId(InetSocketAddress address, Class> protocol,
+
+ public ConnectionId(InetSocketAddress address, Class> protocol,
UserGroupInformation ticket, int rpcTimeout,
RetryPolicy connectionRetryPolicy, Configuration conf) {
this.protocol = protocol;
@@ -1751,7 +1756,28 @@ public static class ConnectionId {
InetSocketAddress getAddress() {
return address;
}
-
+
+ /**
+ * This is used to update the remote address when an address change is detected. This method
+ * ensures that the {@link #hashCode()} won't change.
+ *
+ * @param address the updated address
+ * @throws IllegalArgumentException if the hostname or port doesn't match
+ * @see Connection#updateAddress()
+ */
+ void setAddress(InetSocketAddress address) {
+ if (!Objects.equals(this.address.getHostName(), address.getHostName())) {
+ throw new IllegalArgumentException("Hostname must match: " + this.address + " vs "
+ + address);
+ }
+ if (this.address.getPort() != address.getPort()) {
+ throw new IllegalArgumentException("Port must match: " + this.address + " vs " + address);
+ }
+
+ this.address = address;
+ }
+
+
Class> getProtocol() {
return protocol;
}
@@ -1760,7 +1786,7 @@ UserGroupInformation getTicket() {
return ticket;
}
- private int getRpcTimeout() {
+ int getRpcTimeout() {
return rpcTimeout;
}
@@ -1794,6 +1820,10 @@ boolean getDoPing() {
int getPingInterval() {
return pingInterval;
}
+
+ RetryPolicy getRetryPolicy() {
+ return connectionRetryPolicy;
+ }
@VisibleForTesting
String getSaslQop() {
@@ -1858,7 +1888,11 @@ && isEqual(this.protocol, that.protocol)
@Override
public int hashCode() {
int result = connectionRetryPolicy.hashCode();
- result = PRIME * result + ((address == null) ? 0 : address.hashCode());
+ // We calculate based on the host name and port without the IP address, since the hashCode
+ // must be stable even if the IP address is updated.
+ result = PRIME * result + ((address == null || address.getHostName() == null) ? 0 :
+ address.getHostName().hashCode());
+ result = PRIME * result + ((address == null) ? 0 : address.getPort());
result = PRIME * result + (doPing ? 1231 : 1237);
result = PRIME * result + maxIdleTime;
result = PRIME * result + pingInterval;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
index e53f57b1fc9dd..df0f734d08016 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java
@@ -77,6 +77,16 @@ public static AsyncGet getAsyncReturnMessage() {
return ASYNC_RETURN_MESSAGE.get();
}
+ @Override
+ @SuppressWarnings("unchecked")
+ public ProtocolProxy getProxy(Class protocol, long clientVersion,
+ ConnectionId connId, Configuration conf, SocketFactory factory,
+ AlignmentContext alignmentContext) throws IOException {
+ final Invoker invoker = new Invoker(protocol, connId, conf, factory, alignmentContext);
+ return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance(
+ protocol.getClassLoader(), new Class[] {protocol}, invoker), false);
+ }
+
public ProtocolProxy getProxy(Class protocol, long clientVersion,
InetSocketAddress addr, UserGroupInformation ticket, Configuration conf,
SocketFactory factory, int rpcTimeout) throws IOException {
@@ -116,7 +126,7 @@ public ProtocolProxy getProtocolMetaInfoProxy(
return new ProtocolProxy(protocol,
(ProtocolMetaInfoPB) Proxy.newProxyInstance(protocol.getClassLoader(),
new Class[] { protocol }, new Invoker(protocol, connId, conf,
- factory)), false);
+ factory, null)), false);
}
protected static class Invoker implements RpcInvocationHandler {
@@ -137,9 +147,8 @@ protected Invoker(Class> protocol, InetSocketAddress addr,
throws IOException {
this(protocol, Client.ConnectionId.getConnectionId(
addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf),
- conf, factory);
+ conf, factory, alignmentContext);
this.fallbackToSimpleAuth = fallbackToSimpleAuth;
- this.alignmentContext = alignmentContext;
}
/**
@@ -148,14 +157,16 @@ protected Invoker(Class> protocol, InetSocketAddress addr,
* @param connId input connId.
* @param conf input Configuration.
* @param factory input factory.
+ * @param alignmentContext Alignment context
*/
protected Invoker(Class> protocol, Client.ConnectionId connId,
- Configuration conf, SocketFactory factory) {
+ Configuration conf, SocketFactory factory, AlignmentContext alignmentContext) {
this.remoteId = connId;
this.client = CLIENTS.getClient(conf, factory, RpcWritable.Buffer.class);
this.protocolName = RPC.getProtocolName(protocol);
this.clientProtocolVersion = RPC
.getProtocolVersion(protocol);
+ this.alignmentContext = alignmentContext;
}
private RequestHeaderProto constructRpcRequestHeader(Method method) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
index 3a8c6275820c4..bedecc8851d6a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine2.java
@@ -100,6 +100,16 @@ public ProtocolProxy getProxy(
rpcTimeout, connectionRetryPolicy, null, null);
}
+ @Override
+ @SuppressWarnings("unchecked")
+ public ProtocolProxy getProxy(Class protocol, long clientVersion,
+ ConnectionId connId, Configuration conf, SocketFactory factory,
+ AlignmentContext alignmentContext) throws IOException {
+ final Invoker invoker = new Invoker(protocol, connId, conf, factory, alignmentContext);
+ return new ProtocolProxy(protocol, (T) Proxy.newProxyInstance(
+ protocol.getClassLoader(), new Class[] {protocol}, invoker), false);
+ }
+
@Override
@SuppressWarnings("unchecked")
public ProtocolProxy getProxy(Class protocol, long clientVersion,
@@ -123,7 +133,7 @@ public ProtocolProxy getProtocolMetaInfoProxy(
return new ProtocolProxy(protocol,
(ProtocolMetaInfoPB) Proxy.newProxyInstance(protocol.getClassLoader(),
new Class[]{protocol}, new Invoker(protocol, connId, conf,
- factory)), false);
+ factory, null)), false);
}
protected static class Invoker implements RpcInvocationHandler {
@@ -144,9 +154,8 @@ protected Invoker(Class> protocol, InetSocketAddress addr,
throws IOException {
this(protocol, Client.ConnectionId.getConnectionId(
addr, protocol, ticket, rpcTimeout, connectionRetryPolicy, conf),
- conf, factory);
+ conf, factory, alignmentContext);
this.fallbackToSimpleAuth = fallbackToSimpleAuth;
- this.alignmentContext = alignmentContext;
}
/**
@@ -156,14 +165,16 @@ protected Invoker(Class> protocol, InetSocketAddress addr,
* @param connId input connId.
* @param conf input Configuration.
* @param factory input factory.
+ * @param alignmentContext Alignment context
*/
protected Invoker(Class> protocol, Client.ConnectionId connId,
- Configuration conf, SocketFactory factory) {
+ Configuration conf, SocketFactory factory, AlignmentContext alignmentContext) {
this.remoteId = connId;
this.client = CLIENTS.getClient(conf, factory, RpcWritable.Buffer.class);
this.protocolName = RPC.getProtocolName(protocol);
this.clientProtocolVersion = RPC
.getProtocolVersion(protocol);
+ this.alignmentContext = alignmentContext;
}
private RequestHeaderProto constructRpcRequestHeader(Method method) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
index 818305b316984..fc562b525ad6a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
@@ -541,6 +541,50 @@ public static ProtocolProxy getProtocolProxy(Class protocol,
return getProtocolProxy(protocol, clientVersion, addr, ticket, conf,
factory, getRpcTimeout(conf), null);
}
+
+ /**
+ * Get a protocol proxy that contains a proxy connection to a remote server
+ * and a set of methods that are supported by the server.
+ *
+ * @param Generics Type T
+ * @param protocol protocol class
+ * @param clientVersion client's version
+ * @param connId client connection identifier
+ * @param conf configuration
+ * @param factory socket factory
+ * @return the protocol proxy
+ * @throws IOException if the far end through a RemoteException
+ */
+ public static ProtocolProxy getProtocolProxy(Class protocol,
+ long clientVersion, ConnectionId connId, Configuration conf,
+ SocketFactory factory) throws IOException {
+ return getProtocolProxy(protocol, clientVersion, connId, conf,
+ factory, null);
+ }
+
+ /**
+ * Get a protocol proxy that contains a proxy connection to a remote server
+ * and a set of methods that are supported by the server.
+ *
+ * @param Generics Type T
+ * @param protocol protocol class
+ * @param clientVersion client's version
+ * @param connId client connection identifier
+ * @param conf configuration
+ * @param factory socket factory
+ * @param alignmentContext StateID alignment context
+ * @return the protocol proxy
+ * @throws IOException if the far end through a RemoteException
+ */
+ public static ProtocolProxy getProtocolProxy(Class protocol,
+ long clientVersion, ConnectionId connId, Configuration conf,
+ SocketFactory factory, AlignmentContext alignmentContext) throws IOException {
+ if (UserGroupInformation.isSecurityEnabled()) {
+ SaslRpcServer.init(conf);
+ }
+ return getProtocolEngine(protocol, conf).getProxy(
+ protocol, clientVersion, connId, conf, factory, alignmentContext);
+ }
/**
* Construct a client-side proxy that implements the named protocol,
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java
index 3d64a84bfb46f..624cc08ac25be 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RetryCache.java
@@ -55,14 +55,14 @@ public static class CacheEntry implements LightWeightCache.Entry {
/**
* Processing state of the requests.
*/
- private static byte INPROGRESS = 0;
- private static byte SUCCESS = 1;
- private static byte FAILED = 2;
+ private static final byte INPROGRESS = 0;
+ private static final byte SUCCESS = 1;
+ private static final byte FAILED = 2;
private byte state = INPROGRESS;
// Store uuid as two long for better memory utilization
- private final long clientIdMsb; // Most signficant bytes
+ private final long clientIdMsb; // Most significant bytes
private final long clientIdLsb; // Least significant bytes
private final int callId;
@@ -140,8 +140,8 @@ public long getExpirationTime() {
@Override
public String toString() {
- return (new UUID(this.clientIdMsb, this.clientIdLsb)).toString() + ":"
- + this.callId + ":" + this.state;
+ return String.format("%s:%s:%s", new UUID(this.clientIdMsb, this.clientIdLsb),
+ this.callId, this.state);
}
}
@@ -183,7 +183,7 @@ public Object getPayload() {
private final LightWeightGSet set;
private final long expirationTime;
- private String cacheName;
+ private final String cacheName;
private final ReentrantLock lock = new ReentrantLock();
@@ -195,7 +195,7 @@ public Object getPayload() {
*/
public RetryCache(String cacheName, double percentage, long expirationTime) {
int capacity = LightWeightGSet.computeCapacity(percentage, cacheName);
- capacity = capacity > MAX_CAPACITY ? capacity : MAX_CAPACITY;
+ capacity = Math.max(capacity, MAX_CAPACITY);
this.set = new LightWeightCache(capacity, capacity,
expirationTime, 0);
this.expirationTime = expirationTime;
@@ -203,11 +203,11 @@ public RetryCache(String cacheName, double percentage, long expirationTime) {
this.retryCacheMetrics = RetryCacheMetrics.create(this);
}
- private static boolean skipRetryCache() {
+ private static boolean skipRetryCache(byte[] clientId, int callId) {
// Do not track non RPC invocation or RPC requests with
// invalid callId or clientId in retry cache
- return !Server.isRpcInvocation() || Server.getCallId() < 0
- || Arrays.equals(Server.getClientId(), RpcConstants.DUMMY_CLIENT_ID);
+ return !Server.isRpcInvocation() || callId < 0
+ || Arrays.equals(clientId, RpcConstants.DUMMY_CLIENT_ID);
}
public void lock() {
@@ -332,43 +332,51 @@ public void addCacheEntryWithPayload(byte[] clientId, int callId,
retryCacheMetrics.incrCacheUpdated();
}
- private static CacheEntry newEntry(long expirationTime) {
- return new CacheEntry(Server.getClientId(), Server.getCallId(),
+ private static CacheEntry newEntry(long expirationTime,
+ byte[] clientId, int callId) {
+ return new CacheEntry(clientId, callId,
System.nanoTime() + expirationTime);
}
private static CacheEntryWithPayload newEntry(Object payload,
- long expirationTime) {
- return new CacheEntryWithPayload(Server.getClientId(), Server.getCallId(),
+ long expirationTime, byte[] clientId, int callId) {
+ return new CacheEntryWithPayload(clientId, callId,
payload, System.nanoTime() + expirationTime);
}
/**
* Static method that provides null check for retryCache.
* @param cache input Cache.
+ * @param clientId client id of this request
+ * @param callId client call id of this request
* @return CacheEntry.
*/
- public static CacheEntry waitForCompletion(RetryCache cache) {
- if (skipRetryCache()) {
+ public static CacheEntry waitForCompletion(RetryCache cache,
+ byte[] clientId, int callId) {
+ if (skipRetryCache(clientId, callId)) {
return null;
}
return cache != null ? cache
- .waitForCompletion(newEntry(cache.expirationTime)) : null;
+ .waitForCompletion(newEntry(cache.expirationTime,
+ clientId, callId)) : null;
}
/**
* Static method that provides null check for retryCache.
* @param cache input cache.
* @param payload input payload.
+ * @param clientId client id of this request
+ * @param callId client call id of this request
* @return CacheEntryWithPayload.
*/
public static CacheEntryWithPayload waitForCompletion(RetryCache cache,
- Object payload) {
- if (skipRetryCache()) {
+ Object payload, byte[] clientId, int callId) {
+ if (skipRetryCache(clientId, callId)) {
return null;
}
return (CacheEntryWithPayload) (cache != null ? cache
- .waitForCompletion(newEntry(payload, cache.expirationTime)) : null);
+ .waitForCompletion(newEntry(payload, cache.expirationTime,
+ clientId, callId)) : null);
}
public static void setState(CacheEntry e, boolean success) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java
index afc9d035b097c..f322f6eb98abb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcEngine.java
@@ -57,6 +57,24 @@ ProtocolProxy getProxy(Class protocol,
SocketFactory factory, int rpcTimeout,
RetryPolicy connectionRetryPolicy) throws IOException;
+ /**
+ * Construct a client-side proxy object with a ConnectionId.
+ *
+ * @param Generics Type T.
+ * @param protocol input protocol.
+ * @param clientVersion input clientVersion.
+ * @param connId input ConnectionId.
+ * @param conf input Configuration.
+ * @param factory input factory.
+ * @param alignmentContext Alignment context
+ * @throws IOException raised on errors performing I/O.
+ * @return ProtocolProxy.
+ */
+ ProtocolProxy getProxy(Class protocol, long clientVersion,
+ Client.ConnectionId connId, Configuration conf, SocketFactory factory,
+ AlignmentContext alignmentContext)
+ throws IOException;
+
/**
* Construct a client-side proxy object.
*
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index e79612f7a5a0f..17366eb9569f1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -287,11 +287,8 @@ public static void registerProtocolEngine(RPC.RpcKind rpcKind,
throw new IllegalArgumentException("ReRegistration of rpcKind: " +
rpcKind);
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("rpcKind=" + rpcKind +
- ", rpcRequestWrapperClass=" + rpcRequestWrapperClass +
- ", rpcInvoker=" + rpcInvoker);
- }
+ LOG.debug("rpcKind={}, rpcRequestWrapperClass={}, rpcInvoker={}.",
+ rpcKind, rpcRequestWrapperClass, rpcInvoker);
}
public Class extends Writable> getRpcRequestWrapper(
@@ -928,7 +925,7 @@ public static class Call implements Schedulable,
private volatile String detailedMetricsName = "";
final int callId; // the client's call id
final int retryCount; // the retry count of the call
- long timestampNanos; // time the call was received
+ private final long timestampNanos; // time the call was received
long responseTimestampNanos; // time the call was served
private AtomicInteger responseWaitCount = new AtomicInteger(1);
final RPC.RpcKind rpcKind;
@@ -940,6 +937,9 @@ public static class Call implements Schedulable,
// the priority level assigned by scheduler, 0 by default
private long clientStateId;
private boolean isCallCoordinated;
+ // Serialized RouterFederatedStateProto message to
+ // store last seen states for multiple namespaces.
+ private ByteString federatedNamespaceState;
Call() {
this(RpcConstants.INVALID_CALL_ID, RpcConstants.INVALID_RETRY_COUNT,
@@ -997,6 +997,14 @@ public ProcessingDetails getProcessingDetails() {
return processingDetails;
}
+ public void setFederatedNamespaceState(ByteString federatedNamespaceState) {
+ this.federatedNamespaceState = federatedNamespaceState;
+ }
+
+ public ByteString getFederatedNamespaceState() {
+ return this.federatedNamespaceState;
+ }
+
@Override
public String toString() {
return "Call#" + callId + " Retry#" + retryCount;
@@ -1110,6 +1118,10 @@ public void setDeferredResponse(Writable response) {
public void setDeferredError(Throwable t) {
}
+
+ public long getTimestampNanos() {
+ return timestampNanos;
+ }
}
/** A RPC extended call queued for handling. */
@@ -1191,7 +1203,7 @@ public Void run() throws Exception {
try {
value = call(
- rpcKind, connection.protocolName, rpcRequest, timestampNanos);
+ rpcKind, connection.protocolName, rpcRequest, getTimestampNanos());
} catch (Throwable e) {
populateResponseParamsOnError(e, responseParams);
}
@@ -1212,9 +1224,7 @@ public Void run() throws Exception {
deltaNanos = Time.monotonicNowNanos() - startNanos;
details.set(Timing.RESPONSE, deltaNanos, TimeUnit.NANOSECONDS);
} else {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Deferring response for callId: " + this.callId);
- }
+ LOG.debug("Deferring response for callId: {}", this.callId);
}
return null;
}
@@ -1711,9 +1721,7 @@ private void doRunLoop() {
// If there were some calls that have not been sent out for a
// long time, discard them.
//
- if(LOG.isDebugEnabled()) {
- LOG.debug("Checking for old call responses.");
- }
+ LOG.debug("Checking for old call responses.");
ArrayList calls;
// get the list of channels from list of keys.
@@ -1813,9 +1821,8 @@ private boolean processResponse(LinkedList responseQueue,
//
call = responseQueue.removeFirst();
SocketChannel channel = call.connection.channel;
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() + ": responding to " + call);
- }
+
+ LOG.debug("{}: responding to {}.", Thread.currentThread().getName(), call);
//
// Send as much data as we can in the non-blocking fashion
//
@@ -1832,10 +1839,8 @@ private boolean processResponse(LinkedList responseQueue,
} else {
done = false; // more calls pending to be sent.
}
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() + ": responding to " + call
- + " Wrote " + numBytes + " bytes.");
- }
+ LOG.debug("{}: responding to {} Wrote {} bytes.",
+ Thread.currentThread().getName(), call, numBytes);
} else {
//
// If we were unable to write the entire response out, then
@@ -1860,10 +1865,8 @@ private boolean processResponse(LinkedList responseQueue,
decPending();
}
}
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() + ": responding to " + call
- + " Wrote partial " + numBytes + " bytes.");
- }
+ LOG.debug("{}: responding to {} Wrote partial {} bytes.",
+ Thread.currentThread().getName(), call, numBytes);
}
error = false; // everything went off well
}
@@ -2209,13 +2212,11 @@ private void saslProcess(RpcSaslProto saslMessage)
if (saslServer != null && saslServer.isComplete()) {
if (LOG.isDebugEnabled()) {
- LOG.debug("SASL server context established. Negotiated QoP is "
- + saslServer.getNegotiatedProperty(Sasl.QOP));
+ LOG.debug("SASL server context established. Negotiated QoP is {}.",
+ saslServer.getNegotiatedProperty(Sasl.QOP));
}
user = getAuthorizedUgi(saslServer.getAuthorizationID());
- if (LOG.isDebugEnabled()) {
- LOG.debug("SASL server successfully authenticated client: " + user);
- }
+ LOG.debug("SASL server successfully authenticated client: {}.", user);
rpcMetrics.incrAuthenticationSuccesses();
AUDITLOG.info(AUTH_SUCCESSFUL_FOR + user + " from " + toString());
saslContextEstablished = true;
@@ -2320,10 +2321,8 @@ private RpcSaslProto processSaslToken(RpcSaslProto saslMessage)
throw new SaslException("Client did not send a token");
}
byte[] saslToken = saslMessage.getToken().toByteArray();
- if (LOG.isDebugEnabled()) {
- LOG.debug("Have read input token of size " + saslToken.length
- + " for processing by saslServer.evaluateResponse()");
- }
+ LOG.debug("Have read input token of size {} for processing by saslServer.evaluateResponse()",
+ saslToken.length);
saslToken = saslServer.evaluateResponse(saslToken);
return buildSaslResponse(
saslServer.isComplete() ? SaslState.SUCCESS : SaslState.CHALLENGE,
@@ -2338,9 +2337,8 @@ private void switchToSimple() {
private RpcSaslProto buildSaslResponse(SaslState state, byte[] replyToken) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Will send " + state + " token of size "
- + ((replyToken != null) ? replyToken.length : null)
- + " from saslServer.");
+ LOG.debug("Will send {} token of size {} from saslServer.", state,
+ ((replyToken != null) ? replyToken.length : null));
}
RpcSaslProto.Builder response = RpcSaslProto.newBuilder();
response.setState(state);
@@ -2664,10 +2662,8 @@ private void processConnectionContext(RpcWritable.Buffer buffer)
*/
private void unwrapPacketAndProcessRpcs(byte[] inBuf)
throws IOException, InterruptedException {
- if (LOG.isDebugEnabled()) {
- LOG.debug("Have read input token of size " + inBuf.length
- + " for processing by saslServer.unwrap()");
- }
+ LOG.debug("Have read input token of size {} for processing by saslServer.unwrap()",
+ inBuf.length);
inBuf = saslServer.unwrap(inBuf, 0, inBuf.length);
ReadableByteChannel ch = Channels.newChannel(new ByteArrayInputStream(
inBuf));
@@ -2729,9 +2725,7 @@ private void processOneRpc(ByteBuffer bb)
getMessage(RpcRequestHeaderProto.getDefaultInstance(), buffer);
callId = header.getCallId();
retry = header.getRetryCount();
- if (LOG.isDebugEnabled()) {
- LOG.debug(" got #" + callId);
- }
+ LOG.debug(" got #{}", callId);
checkRpcHeaders(header);
if (callId < 0) { // callIds typically used during connection setup
@@ -2746,11 +2740,8 @@ private void processOneRpc(ByteBuffer bb)
} catch (RpcServerException rse) {
// inform client of error, but do not rethrow else non-fatal
// exceptions will close connection!
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() +
- ": processOneRpc from client " + this +
- " threw exception [" + rse + "]");
- }
+ LOG.debug("{}: processOneRpc from client {} threw exception [{}]",
+ Thread.currentThread().getName(), this, rse);
// use the wrapped exception if there is one.
Throwable t = (rse.getCause() != null) ? rse.getCause() : rse;
final RpcCall call = new RpcCall(this, callId, retry);
@@ -2888,6 +2879,9 @@ private void processRpcRequest(RpcRequestHeaderProto header,
stateId = alignmentContext.receiveRequestState(
header, getMaxIdleTime());
call.setClientStateId(stateId);
+ if (header.hasRouterFederatedState()) {
+ call.setFederatedNamespaceState(header.getRouterFederatedState());
+ }
}
} catch (IOException ioe) {
throw new RpcServerException("Processing RPC request caught ", ioe);
@@ -2962,9 +2956,7 @@ private void authorizeConnection() throws RpcServerException {
ProxyUsers.authorize(user, this.getHostAddress());
}
authorize(user, protocolName, getHostInetAddress());
- if (LOG.isDebugEnabled()) {
- LOG.debug("Successfully authorized " + connectionContext);
- }
+ LOG.debug("Successfully authorized {}.", connectionContext);
rpcMetrics.incrAuthorizationSuccesses();
} catch (AuthorizationException ae) {
LOG.info("Connection from " + this
@@ -3081,7 +3073,7 @@ public Handler(int instanceNumber) {
@Override
public void run() {
- LOG.debug(Thread.currentThread().getName() + ": starting");
+ LOG.debug("{}: starting", Thread.currentThread().getName());
SERVER.set(Server.this);
while (running) {
TraceScope traceScope = null;
@@ -3115,9 +3107,7 @@ public void run() {
call = null;
continue;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() + ": " + call + " for RpcKind " + call.rpcKind);
- }
+ LOG.debug("{}: {} for RpcKind {}.", Thread.currentThread().getName(), call, call.rpcKind);
CurCall.set(call);
if (call.span != null) {
traceScope = tracer.activateSpan(call.span);
@@ -3152,21 +3142,21 @@ public void run() {
IOUtils.cleanupWithLogger(LOG, traceScope);
if (call != null) {
updateMetrics(call, startTimeNanos, connDropped);
- ProcessingDetails.LOG.debug(
- "Served: [{}]{} name={} user={} details={}",
+ ProcessingDetails.LOG.debug("Served: [{}]{} name={} user={} details={}",
call, (call.isResponseDeferred() ? ", deferred" : ""),
call.getDetailedMetricsName(), call.getRemoteUser(),
call.getProcessingDetails());
}
}
}
- LOG.debug(Thread.currentThread().getName() + ": exiting");
+ LOG.debug("{}: exiting", Thread.currentThread().getName());
}
private void requeueCall(Call call)
throws IOException, InterruptedException {
try {
internalQueueCall(call, false);
+ rpcMetrics.incrRequeueCalls();
} catch (RpcServerException rse) {
call.doResponse(rse.getCause(), rse.getRpcStatusProto());
}
@@ -3389,14 +3379,13 @@ private List getAuthMethods(SecretManager> secretManager,
" authentication requires a secret manager");
}
} else if (secretManager != null) {
- LOG.debug(AuthenticationMethod.TOKEN +
- " authentication enabled for secret manager");
+ LOG.debug("{} authentication enabled for secret manager", AuthenticationMethod.TOKEN);
// most preferred, go to the front of the line!
authMethods.add(AuthenticationMethod.TOKEN.getAuthMethod());
}
authMethods.add(confAuthenticationMethod.getAuthMethod());
- LOG.debug("Server accepts auth methods:" + authMethods);
+ LOG.debug("Server accepts auth methods:{}", authMethods);
return authMethods;
}
@@ -3556,9 +3545,7 @@ private void wrapWithSasl(RpcCall call) throws IOException {
synchronized (call.connection.saslServer) {
token = call.connection.saslServer.wrap(token, 0, token.length);
}
- if (LOG.isDebugEnabled())
- LOG.debug("Adding saslServer wrapped token of size " + token.length
- + " as call response.");
+ LOG.debug("Adding saslServer wrapped token of size {} as call response.", token.length);
// rebuild with sasl header and payload
RpcResponseHeaderProto saslHeader = RpcResponseHeaderProto.newBuilder()
.setCallId(AuthProtocol.SASL.callId)
@@ -4004,11 +3991,8 @@ Connection register(SocketChannel channel, int ingressPort,
Connection connection = new Connection(channel, Time.now(),
ingressPort, isOnAuxiliaryPort);
add(connection);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Server connection from " + connection +
- "; # active connections: " + size() +
- "; # queued calls: " + callQueue.size());
- }
+ LOG.debug("Server connection from {}; # active connections: {}; # queued calls: {}.",
+ connection, size(), callQueue.size());
return connection;
}
@@ -4016,9 +4000,8 @@ boolean close(Connection connection) {
boolean exists = remove(connection);
if (exists) {
if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName() +
- ": disconnecting client " + connection +
- ". Number of active connections: "+ size());
+ LOG.debug("{}: disconnecting client {}. Number of active connections: {}.",
+ Thread.currentThread().getName(), connection, size());
}
// only close if actually removed to avoid double-closing due
// to possible races
@@ -4080,9 +4063,7 @@ public void run() {
if (!running) {
return;
}
- if (LOG.isDebugEnabled()) {
- LOG.debug(Thread.currentThread().getName()+": task running");
- }
+ LOG.debug("{}: task running", Thread.currentThread().getName());
try {
closeIdle(false);
} finally {
@@ -4131,4 +4112,18 @@ public synchronized void run() {
}
}
+ @VisibleForTesting
+ CallQueueManager getCallQueue() {
+ return callQueue;
+ }
+
+ @VisibleForTesting
+ void setCallQueue(CallQueueManager callQueue) {
+ this.callQueue = callQueue;
+ }
+
+ @VisibleForTesting
+ void setRpcRequestClass(Class extends Writable> rpcRequestClass) {
+ this.rpcRequestClass = rpcRequestClass;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
index 21181f860d98a..d92bcea5d2eff 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java
@@ -306,6 +306,29 @@ public ProtocolProxy getProxy(Class protocol, long clientVersion,
rpcTimeout, connectionRetryPolicy, null, null);
}
+ /**
+ * Construct a client-side proxy object with a ConnectionId.
+ *
+ * @param Generics Type T.
+ * @param protocol input protocol.
+ * @param clientVersion input clientVersion.
+ * @param connId input ConnectionId.
+ * @param conf input Configuration.
+ * @param factory input factory.
+ * @param alignmentContext Alignment context
+ * @throws IOException raised on errors performing I/O.
+ * @return ProtocolProxy.
+ */
+ @Override
+ public ProtocolProxy getProxy(Class protocol, long clientVersion,
+ Client.ConnectionId connId, Configuration conf, SocketFactory factory,
+ AlignmentContext alignmentContext)
+ throws IOException {
+ return getProxy(protocol, clientVersion, connId.getAddress(),
+ connId.getTicket(), conf, factory, connId.getRpcTimeout(),
+ connId.getRetryPolicy(), null, alignmentContext);
+ }
+
/**
* Construct a client-side proxy object that implements the named protocol,
* talking to a server at the named address.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
index bf21e3865fa8a..282eca3cf8373 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/metrics/RpcMetrics.java
@@ -128,6 +128,8 @@ public static RpcMetrics create(Server server, Configuration conf) {
MutableCounterLong rpcClientBackoff;
@Metric("Number of Slow RPC calls")
MutableCounterLong rpcSlowCalls;
+ @Metric("Number of requeue calls")
+ MutableCounterLong rpcRequeueCalls;
@Metric("Number of open connections") public int numOpenConnections() {
return server.getNumOpenConnections();
@@ -304,6 +306,13 @@ public void incrSlowRpc() {
rpcSlowCalls.incr();
}
+ /**
+ * Increments the Requeue Calls counter.
+ */
+ public void incrRequeueCalls() {
+ rpcRequeueCalls.incr();
+ }
+
/**
* Returns a MutableRate Counter.
* @return Mutable Rate
@@ -344,6 +353,15 @@ public long getRpcSlowCalls() {
return rpcSlowCalls.value();
}
+ /**
+ * Returns the number of requeue calls.
+ * @return long
+ */
+ @VisibleForTesting
+ public long getRpcRequeueCalls() {
+ return rpcRequeueCalls.value();
+ }
+
public MutableRate getDeferredRpcProcessingTime() {
return deferredRpcProcessingTime;
}
@@ -364,4 +382,9 @@ public double getDeferredRpcProcessingStdDev() {
public MetricsTag getTag(String tagName) {
return registry.getTag(tagName);
}
+
+ @VisibleForTesting
+ public MutableCounterLong getRpcAuthorizationSuccesses() {
+ return rpcAuthorizationSuccesses;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java
index 9ffceaaa0ddda..b11f775a73db3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsRecordImpl.java
@@ -22,12 +22,14 @@
import static org.apache.hadoop.util.Preconditions.*;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricsTag;
import static org.apache.hadoop.metrics2.util.Contracts.*;
-class MetricsRecordImpl extends AbstractMetricsRecord {
+@VisibleForTesting
+public class MetricsRecordImpl extends AbstractMetricsRecord {
protected static final String DEFAULT_CONTEXT = "default";
private final long timestamp;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
index 4c5f0a844aaab..60b33a84b519b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableRatesWithAggregation.java
@@ -72,7 +72,7 @@ public void init(Class> protocol) {
return;
}
protocolCache.add(protocol);
- for (Method method : protocol.getDeclaredMethods()) {
+ for (Method method : protocol.getMethods()) {
String name = method.getName();
LOG.debug(name);
addMetricIfNotExists(name);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java
index ea1bde3a75e03..e07260c99936f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/GraphiteSink.java
@@ -21,6 +21,7 @@
import org.apache.commons.configuration2.SubsetConfiguration;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricsException;
import org.apache.hadoop.metrics2.MetricsRecord;
@@ -37,171 +38,173 @@
import java.nio.charset.StandardCharsets;
/**
- * A metrics sink that writes to a Graphite server
+ * A metrics sink that writes to a Graphite server.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class GraphiteSink implements MetricsSink, Closeable {
- private static final Logger LOG =
- LoggerFactory.getLogger(GraphiteSink.class);
- private static final String SERVER_HOST_KEY = "server_host";
- private static final String SERVER_PORT_KEY = "server_port";
- private static final String METRICS_PREFIX = "metrics_prefix";
- private String metricsPrefix = null;
- private Graphite graphite = null;
-
- @Override
- public void init(SubsetConfiguration conf) {
- // Get Graphite host configurations.
- final String serverHost = conf.getString(SERVER_HOST_KEY);
- final int serverPort = Integer.parseInt(conf.getString(SERVER_PORT_KEY));
-
- // Get Graphite metrics graph prefix.
- metricsPrefix = conf.getString(METRICS_PREFIX);
- if (metricsPrefix == null)
- metricsPrefix = "";
-
- graphite = new Graphite(serverHost, serverPort);
- graphite.connect();
+ private static final Logger LOG =
+ LoggerFactory.getLogger(GraphiteSink.class);
+ private static final String SERVER_HOST_KEY = "server_host";
+ private static final String SERVER_PORT_KEY = "server_port";
+ private static final String METRICS_PREFIX = "metrics_prefix";
+ private String metricsPrefix = null;
+ private Graphite graphite = null;
+
+ @Override
+ public void init(SubsetConfiguration conf) {
+ // Get Graphite host configurations.
+ final String serverHost = conf.getString(SERVER_HOST_KEY);
+ final int serverPort = Integer.parseInt(conf.getString(SERVER_PORT_KEY));
+
+ // Get Graphite metrics graph prefix.
+ metricsPrefix = conf.getString(METRICS_PREFIX);
+ if (metricsPrefix == null) {
+ metricsPrefix = "";
}
- @Override
- public void putMetrics(MetricsRecord record) {
- StringBuilder lines = new StringBuilder();
- StringBuilder metricsPathPrefix = new StringBuilder();
-
- // Configure the hierarchical place to display the graph.
- metricsPathPrefix.append(metricsPrefix).append(".")
- .append(record.context()).append(".").append(record.name());
-
- for (MetricsTag tag : record.tags()) {
- if (tag.value() != null) {
- metricsPathPrefix.append(".")
- .append(tag.name())
- .append("=")
- .append(tag.value());
- }
- }
-
- // The record timestamp is in milliseconds while Graphite expects an epoc time in seconds.
- long timestamp = record.timestamp() / 1000L;
+ graphite = new Graphite(serverHost, serverPort);
+ graphite.connect();
+ }
+
+ @Override
+ public void putMetrics(MetricsRecord record) {
+ StringBuilder lines = new StringBuilder();
+ StringBuilder metricsPathPrefix = new StringBuilder();
+
+ // Configure the hierarchical place to display the graph.
+ metricsPathPrefix.append(metricsPrefix).append(".")
+ .append(record.context()).append(".").append(record.name());
+
+ for (MetricsTag tag : record.tags()) {
+ if (tag.value() != null) {
+ metricsPathPrefix.append(".")
+ .append(tag.name())
+ .append("=")
+ .append(tag.value());
+ }
+ }
- // Collect datapoints.
- for (AbstractMetric metric : record.metrics()) {
- lines.append(
- metricsPathPrefix.toString() + "."
- + metric.name().replace(' ', '.')).append(" ")
- .append(metric.value()).append(" ").append(timestamp)
- .append("\n");
- }
+ // The record timestamp is in milliseconds while Graphite expects an epoc time in seconds.
+ long timestamp = record.timestamp() / 1000L;
- try {
- graphite.write(lines.toString());
- } catch (Exception e) {
- LOG.warn("Error sending metrics to Graphite", e);
- try {
- graphite.close();
- } catch (Exception e1) {
- throw new MetricsException("Error closing connection to Graphite", e1);
- }
- }
+ // Collect datapoints.
+ for (AbstractMetric metric : record.metrics()) {
+ lines.append(metricsPathPrefix + "." + metric.name().replace(' ', '.')).append(" ")
+ .append(metric.value()).append(" ").append(timestamp)
+ .append("\n");
}
- @Override
- public void flush() {
+ try {
+ graphite.write(lines.toString());
+ } catch (Exception e) {
+ LOG.warn("Error sending metrics to Graphite.", e);
try {
- graphite.flush();
- } catch (Exception e) {
- LOG.warn("Error flushing metrics to Graphite", e);
- try {
- graphite.close();
- } catch (Exception e1) {
- throw new MetricsException("Error closing connection to Graphite", e1);
- }
+ graphite.close();
+ } catch (Exception e1) {
+ throw new MetricsException("Error closing connection to Graphite", e1);
}
}
-
- @Override
- public void close() throws IOException {
- graphite.close();
+ }
+
+ @Override
+ public void flush() {
+ try {
+ graphite.flush();
+ } catch (Exception e) {
+ LOG.warn("Error flushing metrics to Graphite.", e);
+ try {
+ graphite.close();
+ } catch (Exception e1) {
+ throw new MetricsException("Error closing connection to Graphite.", e1);
+ }
}
+ }
- public static class Graphite {
- private final static int MAX_CONNECTION_FAILURES = 5;
+ @Override
+ public void close() throws IOException {
+ graphite.close();
+ }
- private String serverHost;
- private int serverPort;
- private Writer writer = null;
- private Socket socket = null;
- private int connectionFailures = 0;
+ public static class Graphite {
+ private final static int MAX_CONNECTION_FAILURES = 5;
- public Graphite(String serverHost, int serverPort) {
- this.serverHost = serverHost;
- this.serverPort = serverPort;
- }
+ private String serverHost;
+ private int serverPort;
+ private Writer writer = null;
+ private Socket socket = null;
+ private int connectionFailures = 0;
- public void connect() {
- if (isConnected()) {
- throw new MetricsException("Already connected to Graphite");
- }
- if (tooManyConnectionFailures()) {
- // return silently (there was ERROR in logs when we reached limit for the first time)
- return;
- }
- try {
+ public Graphite(String serverHost, int serverPort) {
+ this.serverHost = serverHost;
+ this.serverPort = serverPort;
+ }
+
+ public void connect() {
+ if (isConnected()) {
+ throw new MetricsException("Already connected to Graphite");
+ }
+ if (tooManyConnectionFailures()) {
+ // return silently (there was ERROR in logs when we reached limit for the first time)
+ return;
+ }
+ try {
// Open a connection to Graphite server.
- socket = new Socket(serverHost, serverPort);
+ socket = new Socket(serverHost, serverPort);
writer = new OutputStreamWriter(socket.getOutputStream(),
StandardCharsets.UTF_8);
- } catch (Exception e) {
- connectionFailures++;
- if (tooManyConnectionFailures()) {
- // first time when connection limit reached, report to logs
- LOG.error("Too many connection failures, would not try to connect again.");
- }
- throw new MetricsException("Error creating connection, "
- + serverHost + ":" + serverPort, e);
+ } catch (Exception e) {
+ connectionFailures++;
+ if (tooManyConnectionFailures()) {
+ // first time when connection limit reached, report to logs
+ LOG.error("Too many connection failures, would not try to connect again.");
}
+ throw new MetricsException("Error creating connection, " +
+ serverHost + ":" + serverPort, e);
}
+ }
- public void write(String msg) throws IOException {
- if (!isConnected()) {
- connect();
- }
- if (isConnected()) {
- writer.write(msg);
- }
+ public void write(String msg) throws IOException {
+ if (!isConnected()) {
+ connect();
}
-
- public void flush() throws IOException {
- if (isConnected()) {
- writer.flush();
- }
+ if (isConnected()) {
+ writer.write(msg);
}
+ }
- public boolean isConnected() {
- return socket != null && socket.isConnected() && !socket.isClosed();
+ public void flush() throws IOException {
+ if (isConnected()) {
+ writer.flush();
}
+ }
- public void close() throws IOException {
- try {
- if (writer != null) {
- writer.close();
- }
- } catch (IOException ex) {
- if (socket != null) {
- socket.close();
- }
- } finally {
- socket = null;
- writer = null;
- }
- }
+ public boolean isConnected() {
+ return socket != null && socket.isConnected() && !socket.isClosed();
+ }
- private boolean tooManyConnectionFailures() {
- return connectionFailures > MAX_CONNECTION_FAILURES;
+ public void close() throws IOException {
+ try {
+ if (writer != null) {
+ writer.close();
+ }
+ } catch (IOException ex) {
+ if (socket != null) {
+ socket.close();
+ }
+ } finally {
+ socket = null;
+ writer = null;
}
+ }
+ private boolean tooManyConnectionFailures() {
+ return connectionFailures > MAX_CONNECTION_FAILURES;
}
+ }
+ @VisibleForTesting
+ void setGraphite(Graphite graphite) {
+ this.graphite = graphite;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/StatsDSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/StatsDSink.java
index d1ec47fdecb31..4f41c0b0057ce 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/StatsDSink.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/StatsDSink.java
@@ -28,6 +28,7 @@
import org.apache.commons.configuration2.SubsetConfiguration;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricType;
import org.apache.hadoop.metrics2.MetricsException;
@@ -214,5 +215,8 @@ public void close() throws IOException {
}
}
-
+ @VisibleForTesting
+ void setStatsd(StatsD statsd) {
+ this.statsd = statsd;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
index d3d794fa74a91..620f2f2faa679 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/sink/ganglia/AbstractGangliaSink.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import java.net.*;
import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -78,6 +79,10 @@ public abstract class AbstractGangliaSink implements MetricsSink {
private int offset;
private boolean supportSparseMetrics = SUPPORT_SPARSE_METRICS_DEFAULT;
+ public List extends SocketAddress> getMetricsServers() {
+ return metricsServers;
+ }
+
/**
* Used for visiting Metrics
*/
@@ -133,8 +138,11 @@ public void init(SubsetConfiguration conf) {
}
// load the gannglia servers from properties
- metricsServers = Servers.parse(conf.getString(SERVERS_PROPERTY),
- DEFAULT_PORT);
+ List serversFromConf =
+ conf.getList(String.class, SERVERS_PROPERTY, new ArrayList());
+ metricsServers =
+ Servers.parse(serversFromConf.size() > 0 ? String.join(",", serversFromConf) : null,
+ DEFAULT_PORT);
multicastEnabled = conf.getBoolean(MULTICAST_ENABLED_PROPERTY,
DEFAULT_MULTICAST_ENABLED);
multicastTtl = conf.getInt(MULTICAST_TTL_PROPERTY, DEFAULT_MULTICAST_TTL);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
index 23abfc4bedc31..dec7033424fbe 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java
@@ -27,33 +27,29 @@
public class SampleStat {
private final MinMax minmax = new MinMax();
private long numSamples = 0;
- private double a0, a1, s0, s1, total;
+ private double mean, s;
/**
* Construct a new running sample stat
*/
public SampleStat() {
- a0 = s0 = 0.0;
- total = 0.0;
+ mean = 0.0;
+ s = 0.0;
}
public void reset() {
numSamples = 0;
- a0 = s0 = 0.0;
- total = 0.0;
+ mean = 0.0;
+ s = 0.0;
minmax.reset();
}
// We want to reuse the object, sometimes.
- void reset(long numSamples, double a0, double a1, double s0, double s1,
- double total, MinMax minmax) {
- this.numSamples = numSamples;
- this.a0 = a0;
- this.a1 = a1;
- this.s0 = s0;
- this.s1 = s1;
- this.total = total;
- this.minmax.reset(minmax);
+ void reset(long numSamples1, double mean1, double s1, MinMax minmax1) {
+ numSamples = numSamples1;
+ mean = mean1;
+ s = s1;
+ minmax.reset(minmax1);
}
/**
@@ -61,7 +57,7 @@ void reset(long numSamples, double a0, double a1, double s0, double s1,
* @param other the destination to hold our values
*/
public void copyTo(SampleStat other) {
- other.reset(numSamples, a0, a1, s0, s1, total, minmax);
+ other.reset(numSamples, mean, s, minmax);
}
/**
@@ -78,24 +74,22 @@ public SampleStat add(double x) {
* Add some sample and a partial sum to the running stat.
* Note, min/max is not evaluated using this method.
* @param nSamples number of samples
- * @param x the partial sum
+ * @param xTotal the partial sum
* @return self
*/
- public SampleStat add(long nSamples, double x) {
+ public SampleStat add(long nSamples, double xTotal) {
numSamples += nSamples;
- total += x;
- if (numSamples == 1) {
- a0 = a1 = x;
- s0 = 0.0;
- }
- else {
- // The Welford method for numerical stability
- a1 = a0 + (x - a0) / numSamples;
- s1 = s0 + (x - a0) * (x - a1);
- a0 = a1;
- s0 = s1;
- }
+ // use the weighted incremental version of Welford's algorithm to get
+ // numerical stability while treating the samples as being weighted
+ // by nSamples
+ // see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+
+ double x = xTotal / nSamples;
+ double meanOld = mean;
+
+ mean += ((double) nSamples / numSamples) * (x - meanOld);
+ s += nSamples * (x - meanOld) * (x - mean);
return this;
}
@@ -110,21 +104,21 @@ public long numSamples() {
* @return the total of all samples added
*/
public double total() {
- return total;
+ return mean * numSamples;
}
/**
* @return the arithmetic mean of the samples
*/
public double mean() {
- return numSamples > 0 ? (total / numSamples) : 0.0;
+ return numSamples > 0 ? mean : 0.0;
}
/**
* @return the variance of the samples
*/
public double variance() {
- return numSamples > 1 ? s1 / (numSamples - 1) : 0.0;
+ return numSamples > 1 ? s / (numSamples - 1) : 0.0;
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
index 5866e2960fe1b..ce962bf9e8c6a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNSDomainNameResolver.java
@@ -18,6 +18,10 @@
package org.apache.hadoop.net;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.naming.NamingException;
import java.net.InetAddress;
import java.net.UnknownHostException;
@@ -27,6 +31,10 @@
* fully qualified domain names belonging to the IPs from this host name
*/
public class DNSDomainNameResolver implements DomainNameResolver {
+
+ private final static Logger LOG =
+ LoggerFactory.getLogger(DNSDomainNameResolver.class.getName());
+
@Override
public InetAddress[] getAllByDomainName(String domainName)
throws UnknownHostException {
@@ -40,6 +48,16 @@ public String getHostnameByIP(InetAddress address) {
&& host.charAt(host.length()-1) == '.') {
host = host.substring(0, host.length()-1);
}
+ // Protect against the Java behaviour of returning the IP address as a string from a cache
+ // instead of performing a reverse lookup.
+ if (host != null && host.equals(address.getHostAddress())) {
+ LOG.debug("IP address returned for FQDN detected: {}", address.getHostAddress());
+ try {
+ return DNS.reverseDns(address, null);
+ } catch (NamingException lookupFailure) {
+ LOG.warn("Failed to perform reverse lookup: {}", address);
+ }
+ }
return host;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
index ebb354e7db3cb..50be1ab759f36 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java
@@ -522,8 +522,7 @@ protected Node chooseRandom(final String scope, String excludedScope,
}
}
if (numOfDatanodes <= 0) {
- LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\")."
- + " numOfDatanodes={}",
+ LOG.debug("Failed to find datanode (scope=\"{}\" excludedScope=\"{}\"). numOfDatanodes={}",
scope, excludedScope, numOfDatanodes);
return null;
}
@@ -539,10 +538,12 @@ protected Node chooseRandom(final String scope, String excludedScope,
netlock.readLock().unlock();
}
}
- LOG.debug("Choosing random from {} available nodes on node {},"
- + " scope={}, excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
- availableNodes, innerNode, scope, excludedScope, excludedNodes,
- numOfDatanodes);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Choosing random from {} available nodes on node {}, scope={},"
+ + " excludedScope={}, excludeNodes={}. numOfDatanodes={}.",
+ availableNodes, innerNode, scope, excludedScope, excludedNodes,
+ numOfDatanodes);
+ }
Node ret = null;
if (availableNodes > 0) {
ret = chooseRandom(innerNode, node, excludedNodes, numOfDatanodes,
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
index 4eb3d865ec78e..f0ff5bd700877 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
@@ -30,6 +30,7 @@
import java.security.GeneralSecurityException;
import java.security.KeyStore;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Hashtable;
import java.util.Iterator;
@@ -59,6 +60,7 @@
import javax.net.ssl.TrustManager;
import javax.net.ssl.TrustManagerFactory;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.thirdparty.com.google.common.collect.Iterators;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -251,6 +253,10 @@ public class LdapGroupsMapping
public static final String POSIX_GID_ATTR_KEY = LDAP_CONFIG_PREFIX + ".posix.attr.gid.name";
public static final String POSIX_GID_ATTR_DEFAULT = "gidNumber";
+ public static final String GROUP_SEARCH_FILTER_PATTERN =
+ LDAP_CONFIG_PREFIX + ".group.search.filter.pattern";
+ public static final String GROUP_SEARCH_FILTER_PATTERN_DEFAULT = "";
+
/*
* Posix attributes
*/
@@ -336,6 +342,7 @@ public class LdapGroupsMapping
private int numAttempts;
private volatile int numAttemptsBeforeFailover;
private volatile String ldapCtxFactoryClassName;
+ private volatile String[] groupSearchFilterParams;
/**
* Returns list of groups for a user.
@@ -428,15 +435,22 @@ private NamingEnumeration lookupPosixGroup(SearchResult result,
* @return a list of strings representing group names of the user.
* @throws NamingException if unable to find group names
*/
- private Set lookupGroup(SearchResult result, DirContext c,
+ @VisibleForTesting
+ Set lookupGroup(SearchResult result, DirContext c,
int goUpHierarchy)
throws NamingException {
Set groups = new LinkedHashSet<>();
Set groupDNs = new HashSet<>();
NamingEnumeration groupResults;
- // perform the second LDAP query
- if (isPosix) {
+
+ String[] resolved = resolveCustomGroupFilterArgs(result);
+ // If custom group filter argument is supplied, use that!!!
+ if (resolved != null) {
+ groupResults =
+ c.search(groupbaseDN, groupSearchFilter, resolved, SEARCH_CONTROLS);
+ } else if (isPosix) {
+ // perform the second LDAP query
groupResults = lookupPosixGroup(result, c);
} else {
String userDn = result.getNameInNamespace();
@@ -460,6 +474,25 @@ private Set lookupGroup(SearchResult result, DirContext c,
return groups;
}
+ private String[] resolveCustomGroupFilterArgs(SearchResult result)
+ throws NamingException {
+ if (groupSearchFilterParams != null) {
+ String[] filterElems = new String[groupSearchFilterParams.length];
+ for (int i = 0; i < groupSearchFilterParams.length; i++) {
+ // Specific handling for userDN.
+ if (groupSearchFilterParams[i].equalsIgnoreCase("userDN")) {
+ filterElems[i] = result.getNameInNamespace();
+ } else {
+ filterElems[i] =
+ result.getAttributes().get(groupSearchFilterParams[i]).get()
+ .toString();
+ }
+ }
+ return filterElems;
+ }
+ return null;
+ }
+
/**
* Perform LDAP queries to get group names of a user.
*
@@ -510,6 +543,8 @@ Set doGetGroups(String user, int goUpHierarchy)
}
} catch (NamingException e) {
// If the first lookup failed, fall back to the typical scenario.
+ // In order to force the fallback, we need to reset groups collection.
+ groups.clear();
LOG.info("Failed to get groups from the first lookup. Initiating " +
"the second LDAP query using the user's DN.", e);
}
@@ -777,6 +812,12 @@ public synchronized void setConf(Configuration conf) {
conf.get(POSIX_UID_ATTR_KEY, POSIX_UID_ATTR_DEFAULT);
posixGidAttr =
conf.get(POSIX_GID_ATTR_KEY, POSIX_GID_ATTR_DEFAULT);
+ String groupSearchFilterParamCSV = conf.get(GROUP_SEARCH_FILTER_PATTERN,
+ GROUP_SEARCH_FILTER_PATTERN_DEFAULT);
+ if(groupSearchFilterParamCSV!=null && !groupSearchFilterParamCSV.isEmpty()) {
+ LOG.debug("Using custom group search filters: {}", groupSearchFilterParamCSV);
+ groupSearchFilterParams = groupSearchFilterParamCSV.split(",");
+ }
int dirSearchTimeout = conf.getInt(DIRECTORY_SEARCH_TIMEOUT,
DIRECTORY_SEARCH_TIMEOUT_DEFAULT);
@@ -791,7 +832,16 @@ public synchronized void setConf(Configuration conf) {
returningAttributes = new String[] {
groupNameAttr, posixUidAttr, posixGidAttr};
}
- SEARCH_CONTROLS.setReturningAttributes(returningAttributes);
+
+ // If custom group filter is being used, fetch attributes in the filter
+ // as well.
+ ArrayList customAttributes = new ArrayList<>();
+ if (groupSearchFilterParams != null) {
+ customAttributes.addAll(Arrays.asList(groupSearchFilterParams));
+ }
+ customAttributes.addAll(Arrays.asList(returningAttributes));
+ SEARCH_CONTROLS
+ .setReturningAttributes(customAttributes.toArray(new String[0]));
// LDAP_CTX_FACTORY_CLASS_DEFAULT is not open to unnamed modules
// in Java 11+, so the default value is set to null to avoid
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java
index e5d62389abab7..ce7878480e22c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java
@@ -237,7 +237,14 @@ private SaslClient createSaslClient(SaslAuth authType)
LOG.debug("client isn't using kerberos");
return null;
}
- String serverPrincipal = getServerPrincipal(authType);
+ final String serverPrincipal;
+ try {
+ serverPrincipal = getServerPrincipal(authType);
+ } catch (IllegalArgumentException ex) {
+ // YARN-11210: getServerPrincipal can throw IllegalArgumentException if Kerberos
+ // configuration is bad, this is surfaced as a non-retryable SaslException
+ throw new SaslException("Bad Kerberos server principal configuration", ex);
+ }
if (serverPrincipal == null) {
LOG.debug("protocol doesn't use kerberos");
return null;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
index 2b9822a3d4817..3369869bde24d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
@@ -44,6 +44,8 @@
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.net.DNS;
+import org.apache.hadoop.net.DomainNameResolver;
+import org.apache.hadoop.net.DomainNameResolverFactory;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.token.Token;
@@ -81,6 +83,8 @@ private SecurityUtil() {
@VisibleForTesting
static HostResolver hostResolver;
+ private static DomainNameResolver domainNameResolver;
+
private static boolean logSlowLookups;
private static int slowLookupThresholdMs;
@@ -112,6 +116,9 @@ private static void setConfigurationInternal(Configuration conf) {
.HADOOP_SECURITY_DNS_LOG_SLOW_LOOKUPS_THRESHOLD_MS_KEY,
CommonConfigurationKeys
.HADOOP_SECURITY_DNS_LOG_SLOW_LOOKUPS_THRESHOLD_MS_DEFAULT);
+
+ domainNameResolver = DomainNameResolverFactory.newInstance(conf,
+ CommonConfigurationKeys.HADOOP_SECURITY_RESOLVER_IMPL);
}
/**
@@ -212,7 +219,7 @@ public static String getServerPrincipal(String principalConfig,
throw new IOException("Can't replace " + HOSTNAME_PATTERN
+ " pattern since client address is null");
}
- return replacePattern(components, addr.getCanonicalHostName());
+ return replacePattern(components, domainNameResolver.getHostnameByIP(addr));
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java
index 520047b3a0414..4fbb3c8aea6fc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/FileBasedKeyStoresFactory.java
@@ -319,8 +319,10 @@ String getPassword(Configuration conf, String alias, String defaultPass) {
*/
@Override
public synchronized void destroy() {
- if (trustManager != null) {
+ if (fileMonitoringTimer != null) {
fileMonitoringTimer.cancel();
+ }
+ if (trustManager != null) {
trustManager = null;
keyManagers = null;
trustManagers = null;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java
index d0630e38b4ddb..fb9a2951f598a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/ZKDelegationTokenSecretManager.java
@@ -42,7 +42,6 @@
import org.apache.curator.framework.recipes.shared.SharedCount;
import org.apache.curator.framework.recipes.shared.VersionedValue;
import org.apache.curator.retry.RetryNTimes;
-import org.apache.curator.utils.EnsurePath;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
@@ -60,6 +59,7 @@
import org.apache.zookeeper.client.ZKClientConfig;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Id;
+import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -134,6 +134,11 @@ public static void setCurator(CuratorFramework curator) {
CURATOR_TL.set(curator);
}
+ @VisibleForTesting
+ protected static CuratorFramework getCurator() {
+ return CURATOR_TL.get();
+ }
+
private final boolean isExternalClient;
protected final CuratorFramework zkClient;
private SharedCount delTokSeqCounter;
@@ -260,10 +265,12 @@ public void startThreads() throws IOException {
// If namespace parents are implicitly created, they won't have ACLs.
// So, let's explicitly create them.
CuratorFramework nullNsFw = zkClient.usingNamespace(null);
- EnsurePath ensureNs =
- nullNsFw.newNamespaceAwareEnsurePath("/" + zkClient.getNamespace());
try {
- ensureNs.ensure(nullNsFw.getZookeeperClient());
+ String nameSpace = "/" + zkClient.getNamespace();
+ Stat stat = nullNsFw.checkExists().forPath(nameSpace);
+ if (stat == null) {
+ nullNsFw.create().creatingParentContainersIfNeeded().forPath(nameSpace);
+ }
} catch (Exception e) {
throw new IOException("Could not create namespace", e);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java
index 32f4b5b7a72d4..dd47aeeefac2c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ExitUtil.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.util;
+import java.util.concurrent.atomic.AtomicReference;
+
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.slf4j.Logger;
@@ -36,8 +38,10 @@ public final class ExitUtil {
LOG = LoggerFactory.getLogger(ExitUtil.class.getName());
private static volatile boolean systemExitDisabled = false;
private static volatile boolean systemHaltDisabled = false;
- private static volatile ExitException firstExitException;
- private static volatile HaltException firstHaltException;
+ private static final AtomicReference FIRST_EXIT_EXCEPTION =
+ new AtomicReference<>();
+ private static final AtomicReference FIRST_HALT_EXCEPTION =
+ new AtomicReference<>();
/** Message raised from an exit exception if none were provided: {@value}. */
public static final String EXIT_EXCEPTION_MESSAGE = "ExitException";
/** Message raised from a halt exception if none were provided: {@value}. */
@@ -159,28 +163,29 @@ public static void disableSystemHalt() {
*/
public static boolean terminateCalled() {
// Either we set this member or we actually called System#exit
- return firstExitException != null;
+ return FIRST_EXIT_EXCEPTION.get() != null;
}
/**
* @return true if halt has been called.
*/
public static boolean haltCalled() {
- return firstHaltException != null;
+ // Either we set this member or we actually called Runtime#halt
+ return FIRST_HALT_EXCEPTION.get() != null;
}
/**
- * @return the first ExitException thrown, null if none thrown yet.
+ * @return the first {@code ExitException} thrown, null if none thrown yet.
*/
public static ExitException getFirstExitException() {
- return firstExitException;
+ return FIRST_EXIT_EXCEPTION.get();
}
/**
* @return the first {@code HaltException} thrown, null if none thrown yet.
*/
public static HaltException getFirstHaltException() {
- return firstHaltException;
+ return FIRST_HALT_EXCEPTION.get();
}
/**
@@ -188,63 +193,136 @@ public static HaltException getFirstHaltException() {
* where one test in the suite expects an exit but others do not.
*/
public static void resetFirstExitException() {
- firstExitException = null;
+ FIRST_EXIT_EXCEPTION.set(null);
}
+ /**
+ * Reset the tracking of process termination. This is for use in unit tests
+ * where one test in the suite expects a halt but others do not.
+ */
public static void resetFirstHaltException() {
- firstHaltException = null;
+ FIRST_HALT_EXCEPTION.set(null);
}
/**
+ * Suppresses if legit and returns the first non-null of the two. Legit means
+ * suppressor if neither null nor suppressed.
+ * @param suppressor Throwable that suppresses suppressed
+ * @param suppressed Throwable that is suppressed by suppressor
+ * @return suppressor if not null, suppressed otherwise
+ */
+ private static T addSuppressed(T suppressor, T suppressed) {
+ if (suppressor == null) {
+ return suppressed;
+ }
+ if (suppressor != suppressed) {
+ suppressor.addSuppressed(suppressed);
+ }
+ return suppressor;
+ }
+
+ /**
+ * Exits the JVM if exit is enabled, rethrow provided exception or any raised error otherwise.
* Inner termination: either exit with the exception's exit code,
* or, if system exits are disabled, rethrow the exception.
* @param ee exit exception
+ * @throws ExitException if {@link System#exit(int)} is disabled and not suppressed by an Error
+ * @throws Error if {@link System#exit(int)} is disabled and one Error arise, suppressing
+ * anything else, even ee
*/
- public static synchronized void terminate(ExitException ee)
- throws ExitException {
- int status = ee.getExitCode();
- String msg = ee.getMessage();
+ public static void terminate(final ExitException ee) throws ExitException {
+ final int status = ee.getExitCode();
+ Error caught = null;
if (status != 0) {
- //exit indicates a problem, log it
- LOG.debug("Exiting with status {}: {}", status, msg, ee);
- LOG.info("Exiting with status {}: {}", status, msg);
+ try {
+ // exit indicates a problem, log it
+ String msg = ee.getMessage();
+ LOG.debug("Exiting with status {}: {}", status, msg, ee);
+ LOG.info("Exiting with status {}: {}", status, msg);
+ } catch (Error e) {
+ // errors have higher priority than HaltException, it may be re-thrown.
+ // OOM and ThreadDeath are 2 examples of Errors to re-throw
+ caught = e;
+ } catch (Throwable t) {
+ // all other kind of throwables are suppressed
+ addSuppressed(ee, t);
+ }
}
if (systemExitDisabled) {
- LOG.error("Terminate called", ee);
- if (!terminateCalled()) {
- firstExitException = ee;
+ try {
+ LOG.error("Terminate called", ee);
+ } catch (Error e) {
+ // errors have higher priority again, if it's a 2nd error, the 1st one suprpesses it
+ caught = addSuppressed(caught, e);
+ } catch (Throwable t) {
+ // all other kind of throwables are suppressed
+ addSuppressed(ee, t);
}
+ FIRST_EXIT_EXCEPTION.compareAndSet(null, ee);
+ if (caught != null) {
+ caught.addSuppressed(ee);
+ throw caught;
+ }
+ // not suppressed by a higher prority error
throw ee;
+ } else {
+ // when exit is enabled, whatever Throwable happened, we exit the VM
+ System.exit(status);
}
- System.exit(status);
}
/**
- * Forcibly terminates the currently running Java virtual machine.
- * The exception argument is rethrown if JVM halting is disabled.
- * @param ee the exception containing the status code, message and any stack
+ * Halts the JVM if halt is enabled, rethrow provided exception or any raised error otherwise.
+ * If halt is disabled, this method throws either the exception argument if no
+ * error arise, the first error if at least one arise, suppressing he.
+ * If halt is enabled, all throwables are caught, even errors.
+ *
+ * @param he the exception containing the status code, message and any stack
* trace.
- * @throws HaltException if {@link Runtime#halt(int)} is disabled.
+ * @throws HaltException if {@link Runtime#halt(int)} is disabled and not suppressed by an Error
+ * @throws Error if {@link Runtime#halt(int)} is disabled and one Error arise, suppressing
+ * anyuthing else, even he
*/
- public static synchronized void halt(HaltException ee) throws HaltException {
- int status = ee.getExitCode();
- String msg = ee.getMessage();
- try {
- if (status != 0) {
- //exit indicates a problem, log it
- LOG.info("Halt with status {}: {}", status, msg, ee);
+ public static void halt(final HaltException he) throws HaltException {
+ final int status = he.getExitCode();
+ Error caught = null;
+ if (status != 0) {
+ try {
+ // exit indicates a problem, log it
+ String msg = he.getMessage();
+ LOG.info("Halt with status {}: {}", status, msg, he);
+ } catch (Error e) {
+ // errors have higher priority than HaltException, it may be re-thrown.
+ // OOM and ThreadDeath are 2 examples of Errors to re-throw
+ caught = e;
+ } catch (Throwable t) {
+ // all other kind of throwables are suppressed
+ addSuppressed(he, t);
}
- } catch (Exception ignored) {
- // ignore exceptions here, as it may be due to an out of memory situation
}
+ // systemHaltDisabled is volatile and not used in scenario nheding atomicty,
+ // thus it does not nhed a synchronized access nor a atomic access
if (systemHaltDisabled) {
- LOG.error("Halt called", ee);
- if (!haltCalled()) {
- firstHaltException = ee;
+ try {
+ LOG.error("Halt called", he);
+ } catch (Error e) {
+ // errors have higher priority again, if it's a 2nd error, the 1st one suprpesses it
+ caught = addSuppressed(caught, e);
+ } catch (Throwable t) {
+ // all other kind of throwables are suppressed
+ addSuppressed(he, t);
}
- throw ee;
+ FIRST_HALT_EXCEPTION.compareAndSet(null, he);
+ if (caught != null) {
+ caught.addSuppressed(he);
+ throw caught;
+ }
+ // not suppressed by a higher prority error
+ throw he;
+ } else {
+ // when halt is enabled, whatever Throwable happened, we halt the VM
+ Runtime.getRuntime().halt(status);
}
- Runtime.getRuntime().halt(status);
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java
index bd6bcb08d9c6d..65978f3c5f59c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java
@@ -60,7 +60,7 @@ public abstract class Shell {
* {@value}
*/
private static final String WINDOWS_PROBLEMS =
- "https://wiki.apache.org/hadoop/WindowsProblems";
+ "https://cwiki.apache.org/confluence/display/HADOOP2/WindowsProblems";
/**
* Name of the windows utils binary: {@value}.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java
new file mode 100644
index 0000000000000..f0cd5c08c572b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Function4RaisingIOE.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util.functional;
+
+import java.io.IOException;
+
+/**
+ * Function of arity 4 which may raise an IOException.
+ * @param type of arg1.
+ * @param type of arg2.
+ * @param type of arg3.
+ * @param type of arg4.
+ * @param return type.
+ */
+public interface Function4RaisingIOE {
+
+ /**
+ * Apply the function.
+ * @param i1 argument 1.
+ * @param i2 argument 2.
+ * @param i3 argument 3.
+ * @param i4 argument 4.
+ * @return return value.
+ * @throws IOException any IOE.
+ */
+ R apply(I1 i1, I2 i2, I3 i3, I4 i4) throws IOException;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java
index 0abaab211de04..c9e6d0b78ac11 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/TaskPool.java
@@ -37,6 +37,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.util.functional.RemoteIterators.remoteIteratorFromIterable;
@@ -136,6 +137,15 @@ public static class Builder {
private boolean stopAbortsOnFailure = false;
private int sleepInterval = SLEEP_INTERVAL_AWAITING_COMPLETION;
+ /**
+ * IOStatisticsContext to switch to in all threads
+ * taking part in the commit operation.
+ * This ensures that the IOStatistics collected in the
+ * worker threads will be aggregated into the total statistics
+ * of the thread calling the committer commit/abort methods.
+ */
+ private IOStatisticsContext ioStatisticsContext = null;
+
/**
* Create the builder.
* @param items items to process
@@ -242,7 +252,7 @@ public Builder stopAbortsOnFailure() {
* @param value new value
* @return the builder
*/
- public Builder sleepInterval(final int value) {
+ public Builder sleepInterval(final int value) {
sleepInterval = value;
return this;
}
@@ -364,6 +374,8 @@ private boolean runSingleThreaded(Task task)
/**
* Parallel execution.
+ * All tasks run within the same IOStatisticsContext as the
+ * thread calling this method.
* @param task task to execute
* @param exception which may be raised in execution.
* @return true if the operation executed successfully
@@ -379,64 +391,70 @@ private boolean runParallel(final Task task)
final AtomicBoolean revertFailed = new AtomicBoolean(false);
List> futures = new ArrayList<>();
+ ioStatisticsContext = IOStatisticsContext.getCurrentIOStatisticsContext();
IOException iteratorIOE = null;
final RemoteIterator iterator = this.items;
try {
- while(iterator.hasNext()) {
+ while (iterator.hasNext()) {
final I item = iterator.next();
// submit a task for each item that will either run or abort the task
futures.add(service.submit(() -> {
- if (!(stopOnFailure && taskFailed.get())) {
- // run the task
- boolean threw = true;
- try {
- LOG.debug("Executing task");
- task.run(item);
- succeeded.add(item);
- LOG.debug("Task succeeded");
-
- threw = false;
-
- } catch (Exception e) {
- taskFailed.set(true);
- exceptions.add(e);
- LOG.info("Task failed {}", e.toString());
- LOG.debug("Task failed", e);
-
- if (onFailure != null) {
- try {
- onFailure.run(item, e);
- } catch (Exception failException) {
- LOG.warn("Failed to clean up on failure", e);
- // swallow the exception
- }
- }
- } finally {
- if (threw) {
+ setStatisticsContext();
+ try {
+ if (!(stopOnFailure && taskFailed.get())) {
+ // prepare and run the task
+ boolean threw = true;
+ try {
+ LOG.debug("Executing task");
+ task.run(item);
+ succeeded.add(item);
+ LOG.debug("Task succeeded");
+
+ threw = false;
+
+ } catch (Exception e) {
taskFailed.set(true);
+ exceptions.add(e);
+ LOG.info("Task failed {}", e.toString());
+ LOG.debug("Task failed", e);
+
+ if (onFailure != null) {
+ try {
+ onFailure.run(item, e);
+ } catch (Exception failException) {
+ LOG.warn("Failed to clean up on failure", e);
+ // swallow the exception
+ }
+ }
+ } finally {
+ if (threw) {
+ taskFailed.set(true);
+ }
}
- }
- } else if (abortTask != null) {
- // abort the task instead of running it
- if (stopAbortsOnFailure && abortFailed.get()) {
- return;
- }
+ } else if (abortTask != null) {
+ // abort the task instead of running it
+ if (stopAbortsOnFailure && abortFailed.get()) {
+ return;
+ }
- boolean failed = true;
- try {
- LOG.info("Aborting task");
- abortTask.run(item);
- failed = false;
- } catch (Exception e) {
- LOG.error("Failed to abort task", e);
- // swallow the exception
- } finally {
- if (failed) {
- abortFailed.set(true);
+ boolean failed = true;
+ try {
+ LOG.info("Aborting task");
+ abortTask.run(item);
+ failed = false;
+ } catch (Exception e) {
+ LOG.error("Failed to abort task", e);
+ // swallow the exception
+ } finally {
+ if (failed) {
+ abortFailed.set(true);
+ }
}
}
+ } finally {
+ resetStatisticsContext();
}
}));
}
@@ -447,7 +465,6 @@ private boolean runParallel(final Task task)
// mark as a task failure so all submitted tasks will halt/abort
taskFailed.set(true);
}
-
// let the above tasks complete (or abort)
waitFor(futures, sleepInterval);
int futureCount = futures.size();
@@ -464,6 +481,7 @@ private boolean runParallel(final Task task)
}
boolean failed = true;
+ setStatisticsContext();
try {
revertTask.run(item);
failed = false;
@@ -474,6 +492,7 @@ private boolean runParallel(final Task task)
if (failed) {
revertFailed.set(true);
}
+ resetStatisticsContext();
}
}));
}
@@ -498,6 +517,26 @@ private boolean runParallel(final Task task)
// return true if all tasks succeeded.
return !taskFailed.get();
}
+
+ /**
+ * Set the statistics context for this thread.
+ */
+ private void setStatisticsContext() {
+ if (ioStatisticsContext != null) {
+ IOStatisticsContext.setThreadIOStatisticsContext(ioStatisticsContext);
+ }
+ }
+
+ /**
+ * Reset the statistics context if it was set earlier.
+ * This unbinds the current thread from any statistics
+ * context.
+ */
+ private void resetStatisticsContext() {
+ if (ioStatisticsContext != null) {
+ IOStatisticsContext.setThreadIOStatisticsContext(null);
+ }
+ }
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto
index 042928c2aee18..d9becf722e982 100644
--- a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto
+++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto
@@ -91,6 +91,10 @@ message RpcRequestHeaderProto { // the header for the RpcRequest
optional RPCTraceInfoProto traceInfo = 6; // tracing info
optional RPCCallerContextProto callerContext = 7; // call context
optional int64 stateId = 8; // The last seen Global State ID
+ // Alignment context info for use with routers.
+ // The client should not interpret these bytes, but only forward bytes
+ // received from RpcResponseHeaderProto.routerFederatedState.
+ optional bytes routerFederatedState = 9;
}
@@ -157,6 +161,10 @@ message RpcResponseHeaderProto {
optional bytes clientId = 7; // Globally unique client ID
optional sint32 retryCount = 8 [default = -1];
optional int64 stateId = 9; // The last written Global State ID
+ // Alignment context info for use with routers.
+ // The client should not interpret these bytes, but only
+ // forward them to the router using RpcRequestHeaderProto.routerFederatedState.
+ optional bytes routerFederatedState = 10;
}
message RpcSaslProto {
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 5a1c09f0141ed..17cd228dc1ba0 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -130,6 +130,14 @@
+
+ hadoop.security.resolver.impl
+ org.apache.hadoop.net.DNSDomainNameResolver
+
+ The resolver implementation used to resolve FQDN for Kerberos
+
+
+
hadoop.security.dns.log-slow-lookups.enabledfalse
@@ -577,6 +585,18 @@
+
+ hadoop.security.group.mapping.ldap.group.search.filter.pattern
+
+
+ Comma separated values that needs to be substituted in the group search
+ filter during group lookup. The values are substituted in the order they
+ appear in the list, the first value will replace {0} the second {1} and
+ so on.
+
+
+
+
hadoop.security.group.mapping.providers
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/GroupsMapping.md b/hadoop-common-project/hadoop-common/src/site/markdown/GroupsMapping.md
index 03759d80092cd..cd6e6fecb1389 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/GroupsMapping.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/GroupsMapping.md
@@ -85,6 +85,14 @@ This is the limit for each ldap query. If `hadoop.security.group.mapping.ldap.s
`hadoop.security.group.mapping.ldap.base` configures how far to walk up the groups hierarchy when resolving groups.
By default, with a limit of 0, in order to be considered a member of a group, the user must be an explicit member in LDAP. Otherwise, it will traverse the group hierarchy `hadoop.security.group.mapping.ldap.search.group.hierarchy.levels` levels up.
+It is possible to have custom group search filters with different arguments using
+the configuration `hadoop.security.group.mapping.ldap.group.search.filter.pattern`, we can configure comma separated values here and the values configured will be fetched from the LDAP attributes and will be replaced in the group
+search filter in the order they appear here, say if the first entry here is uid, so uid will be fetched from the attributes and the value fetched
+will be used in place of {0} in the group search filter, similarly the second value configured will replace {1} and so on.
+
+Note: If `hadoop.security.group.mapping.ldap.group.search.filter.pattern` is configured, the group search will always be done assuming this group
+search filter pattern irrespective of any other parameters.
+
### Bind user(s) ###
If the LDAP server does not support anonymous binds,
set the distinguished name of the user to bind in `hadoop.security.group.mapping.ldap.bind.user`.
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
index 197b999c81f66..f64a2bd03b63b 100644
--- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
@@ -454,6 +454,13 @@ Also, clients are encouraged to use `WeakReferencedElasticByteBufferPool` for
allocating buffers such that even direct buffers are garbage collected when
they are no longer referenced.
+The position returned by `getPos()` after `readVectored()` is undefined.
+
+If a file is changed while the `readVectored()` operation is in progress, the output is
+undefined. Some ranges may have old data, some may have new, and some may have both.
+
+While a `readVectored()` operation is in progress, normal read api calls may block.
+
Note: Don't use direct buffers for reading from ChecksumFileSystem as that may
lead to memory fragmentation explained in HADOOP-18296.
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md
new file mode 100644
index 0000000000000..fc0079d1c9bd8
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/CHANGELOG.3.2.4.md
@@ -0,0 +1,213 @@
+
+
+# Apache Hadoop Changelog
+
+## Release 3.2.4 - 2022-07-12
+
+
+
+### NEW FEATURES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-16337](https://issues.apache.org/jira/browse/HDFS-16337) | Show start time of Datanode on Web | Minor | . | Tao Li | Tao Li |
+
+
+### IMPROVEMENTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-15075](https://issues.apache.org/jira/browse/HDFS-15075) | Remove process command timing from BPServiceActor | Major | . | Íñigo Goiri | Xiaoqiao He |
+| [HDFS-15150](https://issues.apache.org/jira/browse/HDFS-15150) | Introduce read write lock to Datanode | Major | datanode | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16175](https://issues.apache.org/jira/browse/HDFS-16175) | Improve the configurable value of Server #PURGE\_INTERVAL\_NANOS | Major | ipc | JiangHua Zhu | JiangHua Zhu |
+| [HDFS-16173](https://issues.apache.org/jira/browse/HDFS-16173) | Improve CopyCommands#Put#executor queue configurability | Major | fs | JiangHua Zhu | JiangHua Zhu |
+| [HADOOP-17897](https://issues.apache.org/jira/browse/HADOOP-17897) | Allow nested blocks in switch case in checkstyle settings | Minor | build | Masatake Iwasaki | Masatake Iwasaki |
+| [HADOOP-17857](https://issues.apache.org/jira/browse/HADOOP-17857) | Check real user ACLs in addition to proxied user ACLs | Major | . | Eric Payne | Eric Payne |
+| [HDFS-14997](https://issues.apache.org/jira/browse/HDFS-14997) | BPServiceActor processes commands from NameNode asynchronously | Major | datanode | Xiaoqiao He | Xiaoqiao He |
+| [HADOOP-17926](https://issues.apache.org/jira/browse/HADOOP-17926) | Maven-eclipse-plugin is no longer needed since Eclipse can import Maven projects by itself. | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda |
+| [YARN-10935](https://issues.apache.org/jira/browse/YARN-10935) | AM Total Queue Limit goes below per-user AM Limit if parent is full. | Major | capacity scheduler, capacityscheduler | Eric Payne | Eric Payne |
+| [HDFS-16241](https://issues.apache.org/jira/browse/HDFS-16241) | Standby close reconstruction thread | Major | . | zhanghuazong | zhanghuazong |
+| [YARN-1115](https://issues.apache.org/jira/browse/YARN-1115) | Provide optional means for a scheduler to check real user ACLs | Major | capacity scheduler, scheduler | Eric Payne | |
+| [HDFS-16279](https://issues.apache.org/jira/browse/HDFS-16279) | Print detail datanode info when process first storage report | Minor | . | Tao Li | Tao Li |
+| [HDFS-16294](https://issues.apache.org/jira/browse/HDFS-16294) | Remove invalid DataNode#CONFIG\_PROPERTY\_SIMULATED | Major | datanode | JiangHua Zhu | JiangHua Zhu |
+| [HDFS-16299](https://issues.apache.org/jira/browse/HDFS-16299) | Fix bug for TestDataNodeVolumeMetrics#verifyDataNodeVolumeMetrics | Minor | . | Tao Li | Tao Li |
+| [HDFS-16301](https://issues.apache.org/jira/browse/HDFS-16301) | Improve BenchmarkThroughput#SIZE naming standardization | Minor | benchmarks, test | JiangHua Zhu | JiangHua Zhu |
+| [YARN-10997](https://issues.apache.org/jira/browse/YARN-10997) | Revisit allocation and reservation logging | Major | . | Andras Gyori | Andras Gyori |
+| [HDFS-16315](https://issues.apache.org/jira/browse/HDFS-16315) | Add metrics related to Transfer and NativeCopy for DataNode | Major | . | Tao Li | Tao Li |
+| [HADOOP-17998](https://issues.apache.org/jira/browse/HADOOP-17998) | Allow get command to run with multi threads. | Major | fs | Chengwei Wang | Chengwei Wang |
+| [HDFS-16345](https://issues.apache.org/jira/browse/HDFS-16345) | Fix test cases fail in TestBlockStoragePolicy | Major | build | guophilipse | guophilipse |
+| [HADOOP-18035](https://issues.apache.org/jira/browse/HADOOP-18035) | Skip unit test failures to run all the unit tests | Major | build | Akira Ajisaka | Akira Ajisaka |
+| [HADOOP-18040](https://issues.apache.org/jira/browse/HADOOP-18040) | Use maven.test.failure.ignore instead of ignoreTestFailure | Major | build | Akira Ajisaka | Akira Ajisaka |
+| [HDFS-16352](https://issues.apache.org/jira/browse/HDFS-16352) | return the real datanode numBlocks in #getDatanodeStorageReport | Major | . | qinyuren | qinyuren |
+| [HDFS-16386](https://issues.apache.org/jira/browse/HDFS-16386) | Reduce DataNode load when FsDatasetAsyncDiskService is working | Major | datanode | JiangHua Zhu | JiangHua Zhu |
+| [HDFS-16391](https://issues.apache.org/jira/browse/HDFS-16391) | Avoid evaluation of LOG.debug statement in NameNodeHeartbeatService | Trivial | . | wangzhaohui | wangzhaohui |
+| [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | Improve RM system metrics publisher's performance by pushing events to timeline server in batch | Critical | resourcemanager, timelineserver | Hu Ziqian | Ashutosh Gupta |
+| [HDFS-16430](https://issues.apache.org/jira/browse/HDFS-16430) | Validate maximum blocks in EC group when adding an EC policy | Minor | ec, erasure-coding | daimin | daimin |
+| [HDFS-16403](https://issues.apache.org/jira/browse/HDFS-16403) | Improve FUSE IO performance by supporting FUSE parameter max\_background | Minor | fuse-dfs | daimin | daimin |
+| [HADOOP-18136](https://issues.apache.org/jira/browse/HADOOP-18136) | Verify FileUtils.unTar() handling of missing .tar files | Minor | test, util | Steve Loughran | Steve Loughran |
+| [HDFS-16529](https://issues.apache.org/jira/browse/HDFS-16529) | Remove unnecessary setObserverRead in TestConsistentReadsObserver | Trivial | test | wangzhaohui | wangzhaohui |
+| [HDFS-16530](https://issues.apache.org/jira/browse/HDFS-16530) | setReplication debug log creates a new string even if debug is disabled | Major | namenode | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16427](https://issues.apache.org/jira/browse/HDFS-16427) | Add debug log for BlockManager#chooseExcessRedundancyStriped | Minor | erasure-coding | Tao Li | Tao Li |
+| [HDFS-16389](https://issues.apache.org/jira/browse/HDFS-16389) | Improve NNThroughputBenchmark test mkdirs | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu |
+| [MAPREDUCE-7373](https://issues.apache.org/jira/browse/MAPREDUCE-7373) | Building MapReduce NativeTask fails on Fedora 34+ | Major | build, nativetask | Kengo Seki | Kengo Seki |
+| [HDFS-16355](https://issues.apache.org/jira/browse/HDFS-16355) | Improve the description of dfs.block.scanner.volume.bytes.per.second | Minor | documentation, hdfs | guophilipse | guophilipse |
+| [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | Replace log4j 1.x with reload4j | Major | . | Wei-Chiu Chuang | Wei-Chiu Chuang |
+| [HDFS-16501](https://issues.apache.org/jira/browse/HDFS-16501) | Print the exception when reporting a bad block | Major | datanode | qinyuren | qinyuren |
+| [YARN-11116](https://issues.apache.org/jira/browse/YARN-11116) | Migrate Times util from SimpleDateFormat to thread-safe DateTimeFormatter class | Minor | . | Jonathan Turner Eagles | Jonathan Turner Eagles |
+| [YARN-10080](https://issues.apache.org/jira/browse/YARN-10080) | Support show app id on localizer thread pool | Major | nodemanager | zhoukang | Ashutosh Gupta |
+| [HADOOP-18240](https://issues.apache.org/jira/browse/HADOOP-18240) | Upgrade Yetus to 0.14.0 | Major | build | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16585](https://issues.apache.org/jira/browse/HDFS-16585) | Add @VisibleForTesting in Dispatcher.java after HDFS-16268 | Trivial | . | Wei-Chiu Chuang | Ashutosh Gupta |
+| [HDFS-16610](https://issues.apache.org/jira/browse/HDFS-16610) | Make fsck read timeout configurable | Major | hdfs-client | Stephen O'Donnell | Stephen O'Donnell |
+
+
+### BUG FIXES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-13983](https://issues.apache.org/jira/browse/HDFS-13983) | TestOfflineImageViewer crashes in windows | Major | . | Vinayakumar B | Vinayakumar B |
+| [YARN-9744](https://issues.apache.org/jira/browse/YARN-9744) | RollingLevelDBTimelineStore.getEntityByTime fails with NPE | Major | timelineserver | Prabhu Joseph | Prabhu Joseph |
+| [HDFS-15113](https://issues.apache.org/jira/browse/HDFS-15113) | Missing IBR when NameNode restart if open processCommand async feature | Blocker | datanode | Xiaoqiao He | Xiaoqiao He |
+| [HADOOP-16985](https://issues.apache.org/jira/browse/HADOOP-16985) | Handle release package related issues | Major | . | Vinayakumar B | Vinayakumar B |
+| [HADOOP-17116](https://issues.apache.org/jira/browse/HADOOP-17116) | Skip Retry INFO logging on first failover from a proxy | Major | ha | Hanisha Koneru | Hanisha Koneru |
+| [HDFS-15651](https://issues.apache.org/jira/browse/HDFS-15651) | Client could not obtain block when DN CommandProcessingThread exit | Major | . | Yiqun Lin | Mingxiang Li |
+| [HDFS-15963](https://issues.apache.org/jira/browse/HDFS-15963) | Unreleased volume references cause an infinite loop | Critical | datanode | Shuyan Zhang | Shuyan Zhang |
+| [HDFS-14575](https://issues.apache.org/jira/browse/HDFS-14575) | LeaseRenewer#daemon threads leak in DFSClient | Major | . | Tao Yang | Renukaprasad C |
+| [HADOOP-17796](https://issues.apache.org/jira/browse/HADOOP-17796) | Upgrade jetty version to 9.4.43 | Major | . | Wei-Chiu Chuang | Renukaprasad C |
+| [HDFS-15175](https://issues.apache.org/jira/browse/HDFS-15175) | Multiple CloseOp shared block instance causes the standby namenode to crash when rolling editlog | Critical | . | Yicong Cai | Wan Chang |
+| [HDFS-16177](https://issues.apache.org/jira/browse/HDFS-16177) | Bug fix for Util#receiveFile | Minor | . | Tao Li | Tao Li |
+| [YARN-10814](https://issues.apache.org/jira/browse/YARN-10814) | YARN shouldn't start with empty hadoop.http.authentication.signature.secret.file | Major | . | Benjamin Teke | Tamas Domok |
+| [HADOOP-17874](https://issues.apache.org/jira/browse/HADOOP-17874) | ExceptionsHandler to add terse/suppressed Exceptions in thread-safe manner | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-15129](https://issues.apache.org/jira/browse/HADOOP-15129) | Datanode caches namenode DNS lookup failure and cannot startup | Minor | ipc | Karthik Palaniappan | Chris Nauroth |
+| [YARN-10901](https://issues.apache.org/jira/browse/YARN-10901) | Permission checking error on an existing directory in LogAggregationFileController#verifyAndCreateRemoteLogDir | Major | nodemanager | Tamas Domok | Tamas Domok |
+| [HDFS-16207](https://issues.apache.org/jira/browse/HDFS-16207) | Remove NN logs stack trace for non-existent xattr query | Major | namenode | Ahmed Hussein | Ahmed Hussein |
+| [HDFS-16187](https://issues.apache.org/jira/browse/HDFS-16187) | SnapshotDiff behaviour with Xattrs and Acls is not consistent across NN restarts with checkpointing | Major | snapshots | Srinivasu Majeti | Shashikant Banerjee |
+| [HDFS-16198](https://issues.apache.org/jira/browse/HDFS-16198) | Short circuit read leaks Slot objects when InvalidToken exception is thrown | Major | . | Eungsop Yoo | Eungsop Yoo |
+| [YARN-10870](https://issues.apache.org/jira/browse/YARN-10870) | Missing user filtering check -\> yarn.webapp.filter-entity-list-by-user for RM Scheduler page | Major | yarn | Siddharth Ahuja | Gergely Pollák |
+| [HADOOP-17919](https://issues.apache.org/jira/browse/HADOOP-17919) | Fix command line example in Hadoop Cluster Setup documentation | Minor | documentation | Rintaro Ikeda | Rintaro Ikeda |
+| [HDFS-16235](https://issues.apache.org/jira/browse/HDFS-16235) | Deadlock in LeaseRenewer for static remove method | Major | hdfs | angerszhu | angerszhu |
+| [HDFS-16181](https://issues.apache.org/jira/browse/HDFS-16181) | [SBN Read] Fix metric of RpcRequestCacheMissAmount can't display when tailEditLog form JN | Critical | . | wangzhaohui | wangzhaohui |
+| [HADOOP-17925](https://issues.apache.org/jira/browse/HADOOP-17925) | BUILDING.txt should not encourage to activate docs profile on building binary artifacts | Minor | documentation | Rintaro Ikeda | Masatake Iwasaki |
+| [HADOOP-16532](https://issues.apache.org/jira/browse/HADOOP-16532) | Fix TestViewFsTrash to use the correct homeDir. | Minor | test, viewfs | Steve Loughran | Xing Lin |
+| [HDFS-16268](https://issues.apache.org/jira/browse/HDFS-16268) | Balancer stuck when moving striped blocks due to NPE | Major | balancer & mover, erasure-coding | Leon Gao | Leon Gao |
+| [HDFS-7612](https://issues.apache.org/jira/browse/HDFS-7612) | TestOfflineEditsViewer.testStored() uses incorrect default value for cacheDir | Major | test | Konstantin Shvachko | Michael Kuchenbecker |
+| [HDFS-16311](https://issues.apache.org/jira/browse/HDFS-16311) | Metric metadataOperationRate calculation error in DataNodeVolumeMetrics | Major | . | Tao Li | Tao Li |
+| [HDFS-16182](https://issues.apache.org/jira/browse/HDFS-16182) | numOfReplicas is given the wrong value in BlockPlacementPolicyDefault$chooseTarget can cause DataStreamer to fail with Heterogeneous Storage | Major | namanode | Max Xie | Max Xie |
+| [HADOOP-17999](https://issues.apache.org/jira/browse/HADOOP-17999) | No-op implementation of setWriteChecksum and setVerifyChecksum in ViewFileSystem | Major | . | Abhishek Das | Abhishek Das |
+| [HDFS-16329](https://issues.apache.org/jira/browse/HDFS-16329) | Fix log format for BlockManager | Minor | . | Tao Li | Tao Li |
+| [HDFS-16330](https://issues.apache.org/jira/browse/HDFS-16330) | Fix incorrect placeholder for Exception logs in DiskBalancer | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16328](https://issues.apache.org/jira/browse/HDFS-16328) | Correct disk balancer param desc | Minor | documentation, hdfs | guophilipse | guophilipse |
+| [HDFS-16343](https://issues.apache.org/jira/browse/HDFS-16343) | Add some debug logs when the dfsUsed are not used during Datanode startup | Major | datanode | Mukul Kumar Singh | Mukul Kumar Singh |
+| [YARN-10991](https://issues.apache.org/jira/browse/YARN-10991) | Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method | Minor | distributed-shell | Ashutosh Gupta | Ashutosh Gupta |
+| [HADOOP-17975](https://issues.apache.org/jira/browse/HADOOP-17975) | Fallback to simple auth does not work for a secondary DistributedFileSystem instance | Major | ipc | István Fajth | István Fajth |
+| [HDFS-16350](https://issues.apache.org/jira/browse/HDFS-16350) | Datanode start time should be set after RPC server starts successfully | Minor | . | Viraj Jasani | Viraj Jasani |
+| [YARN-11007](https://issues.apache.org/jira/browse/YARN-11007) | Correct words in YARN documents | Minor | documentation | guophilipse | guophilipse |
+| [HDFS-16332](https://issues.apache.org/jira/browse/HDFS-16332) | Expired block token causes slow read due to missing handling in sasl handshake | Major | datanode, dfs, dfsclient | Shinya Yoshida | Shinya Yoshida |
+| [YARN-9063](https://issues.apache.org/jira/browse/YARN-9063) | ATS 1.5 fails to start if RollingLevelDb files are corrupt or missing | Major | timelineserver, timelineservice | Tarun Parimi | Ashutosh Gupta |
+| [HDFS-16333](https://issues.apache.org/jira/browse/HDFS-16333) | fix balancer bug when transfer an EC block | Major | balancer & mover, erasure-coding | qinyuren | qinyuren |
+| [HDFS-16373](https://issues.apache.org/jira/browse/HDFS-16373) | Fix MiniDFSCluster restart in case of multiple namenodes | Major | . | Ayush Saxena | Ayush Saxena |
+| [HDFS-16377](https://issues.apache.org/jira/browse/HDFS-16377) | Should CheckNotNull before access FsDatasetSpi | Major | . | Tao Li | Tao Li |
+| [YARN-6862](https://issues.apache.org/jira/browse/YARN-6862) | Nodemanager resource usage metrics sometimes are negative | Major | nodemanager | YunFan Zhou | Benjamin Teke |
+| [YARN-10178](https://issues.apache.org/jira/browse/YARN-10178) | Global Scheduler async thread crash caused by 'Comparison method violates its general contract | Major | capacity scheduler | tuyu | Andras Gyori |
+| [HDFS-16395](https://issues.apache.org/jira/browse/HDFS-16395) | Remove useless NNThroughputBenchmark#dummyActionNoSynch() | Major | benchmarks, namenode | JiangHua Zhu | JiangHua Zhu |
+| [HADOOP-18063](https://issues.apache.org/jira/browse/HADOOP-18063) | Remove unused import AbstractJavaKeyStoreProvider in Shell class | Minor | . | JiangHua Zhu | JiangHua Zhu |
+| [HDFS-16409](https://issues.apache.org/jira/browse/HDFS-16409) | Fix typo: testHasExeceptionsReturnsCorrectValue -\> testHasExceptionsReturnsCorrectValue | Trivial | . | Ashutosh Gupta | Ashutosh Gupta |
+| [HDFS-16408](https://issues.apache.org/jira/browse/HDFS-16408) | Ensure LeaseRecheckIntervalMs is greater than zero | Major | namenode | Jingxuan Fu | Jingxuan Fu |
+| [YARN-11055](https://issues.apache.org/jira/browse/YARN-11055) | In cgroups-operations.c some fprintf format strings don't end with "\\n" | Minor | nodemanager | Gera Shegalov | Gera Shegalov |
+| [HDFS-16303](https://issues.apache.org/jira/browse/HDFS-16303) | Losing over 100 datanodes in state decommissioning results in full blockage of all datanode decommissioning | Major | . | Kevin Wikant | Kevin Wikant |
+| [HDFS-16443](https://issues.apache.org/jira/browse/HDFS-16443) | Fix edge case where DatanodeAdminDefaultMonitor doubly enqueues a DatanodeDescriptor on exception | Major | hdfs | Kevin Wikant | Kevin Wikant |
+| [HDFS-16449](https://issues.apache.org/jira/browse/HDFS-16449) | Fix hadoop web site release notes and changelog not available | Minor | documentation | guophilipse | guophilipse |
+| [HADOOP-18192](https://issues.apache.org/jira/browse/HADOOP-18192) | Fix multiple\_bindings warning about slf4j-reload4j | Major | . | Masatake Iwasaki | Masatake Iwasaki |
+| [HDFS-16479](https://issues.apache.org/jira/browse/HDFS-16479) | EC: NameNode should not send a reconstruction work when the source datanodes are insufficient | Critical | ec, erasure-coding | Yuanbo Liu | Takanobu Asanuma |
+| [HDFS-16509](https://issues.apache.org/jira/browse/HDFS-16509) | Fix decommission UnsupportedOperationException: Remove unsupported | Major | namenode | daimin | daimin |
+| [HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) | EC: Decommission a rack with only on dn will fail when the rack number is equal with replication | Critical | ec, namenode | caozhiqiang | caozhiqiang |
+| [HDFS-16437](https://issues.apache.org/jira/browse/HDFS-16437) | ReverseXML processor doesn't accept XML files without the SnapshotDiffSection. | Critical | hdfs | yanbin.zhang | yanbin.zhang |
+| [HDFS-16507](https://issues.apache.org/jira/browse/HDFS-16507) | [SBN read] Avoid purging edit log which is in progress | Critical | . | Tao Li | Tao Li |
+| [YARN-10720](https://issues.apache.org/jira/browse/YARN-10720) | YARN WebAppProxyServlet should support connection timeout to prevent proxy server from hanging | Critical | . | Qi Zhu | Qi Zhu |
+| [HDFS-16428](https://issues.apache.org/jira/browse/HDFS-16428) | Source path with storagePolicy cause wrong typeConsumed while rename | Major | hdfs, namenode | lei w | lei w |
+| [YARN-11014](https://issues.apache.org/jira/browse/YARN-11014) | YARN incorrectly validates maximum capacity resources on the validation API | Major | . | Benjamin Teke | Benjamin Teke |
+| [YARN-11075](https://issues.apache.org/jira/browse/YARN-11075) | Explicitly declare serialVersionUID in LogMutation class | Major | . | Benjamin Teke | Benjamin Teke |
+| [HDFS-11041](https://issues.apache.org/jira/browse/HDFS-11041) | Unable to unregister FsDatasetState MBean if DataNode is shutdown twice | Trivial | datanode | Wei-Chiu Chuang | Wei-Chiu Chuang |
+| [HDFS-16538](https://issues.apache.org/jira/browse/HDFS-16538) | EC decoding failed due to not enough valid inputs | Major | erasure-coding | qinyuren | qinyuren |
+| [HDFS-16544](https://issues.apache.org/jira/browse/HDFS-16544) | EC decoding failed due to invalid buffer | Major | erasure-coding | qinyuren | qinyuren |
+| [HDFS-16546](https://issues.apache.org/jira/browse/HDFS-16546) | Fix UT TestOfflineImageViewer#testReverseXmlWithoutSnapshotDiffSection to branch branch-3.2 | Major | test | daimin | daimin |
+| [HDFS-16552](https://issues.apache.org/jira/browse/HDFS-16552) | Fix NPE for TestBlockManager | Major | . | Tao Li | Tao Li |
+| [MAPREDUCE-7246](https://issues.apache.org/jira/browse/MAPREDUCE-7246) | In MapredAppMasterRest#Mapreduce\_Application\_Master\_Info\_API, the datatype of appId should be "string". | Major | documentation | jenny | Ashutosh Gupta |
+| [YARN-10187](https://issues.apache.org/jira/browse/YARN-10187) | Removing hadoop-yarn-project/hadoop-yarn/README as it is no longer maintained. | Minor | documentation | N Sanketh Reddy | Ashutosh Gupta |
+| [HDFS-16185](https://issues.apache.org/jira/browse/HDFS-16185) | Fix comment in LowRedundancyBlocks.java | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
+| [HADOOP-17479](https://issues.apache.org/jira/browse/HADOOP-17479) | Fix the examples of hadoop config prefix | Minor | documentation | Akira Ajisaka | Ashutosh Gupta |
+| [HDFS-16579](https://issues.apache.org/jira/browse/HDFS-16579) | Fix build failure for TestBlockManager on branch-3.2 | Major | . | Tao Li | Tao Li |
+| [YARN-11092](https://issues.apache.org/jira/browse/YARN-11092) | Upgrade jquery ui to 1.13.1 | Major | . | D M Murali Krishna Reddy | Ashutosh Gupta |
+| [YARN-11133](https://issues.apache.org/jira/browse/YARN-11133) | YarnClient gets the wrong EffectiveMinCapacity value | Major | api | Zilong Zhu | Zilong Zhu |
+| [YARN-10850](https://issues.apache.org/jira/browse/YARN-10850) | TimelineService v2 lists containers for all attempts when filtering for one | Major | timelinereader | Benjamin Teke | Benjamin Teke |
+| [YARN-11126](https://issues.apache.org/jira/browse/YARN-11126) | ZKConfigurationStore Java deserialisation vulnerability | Major | yarn | Tamas Domok | Tamas Domok |
+| [YARN-11162](https://issues.apache.org/jira/browse/YARN-11162) | Set the zk acl for nodes created by ZKConfigurationStore. | Major | resourcemanager | Owen O'Malley | Owen O'Malley |
+| [HDFS-16586](https://issues.apache.org/jira/browse/HDFS-16586) | Purge FsDatasetAsyncDiskService threadgroup; it causes BPServiceActor$CommandProcessingThread IllegalThreadStateException 'fatal exception and exit' | Major | datanode | Michael Stack | Michael Stack |
+| [HADOOP-18251](https://issues.apache.org/jira/browse/HADOOP-18251) | Fix failure of extracting JIRA id from commit message in git\_jira\_fix\_version\_check.py | Minor | build | Masatake Iwasaki | Masatake Iwasaki |
+| [HDFS-16583](https://issues.apache.org/jira/browse/HDFS-16583) | DatanodeAdminDefaultMonitor can get stuck in an infinite loop | Major | . | Stephen O'Donnell | Stephen O'Donnell |
+| [HDFS-16623](https://issues.apache.org/jira/browse/HDFS-16623) | IllegalArgumentException in LifelineSender | Major | . | ZanderXu | ZanderXu |
+| [HDFS-16064](https://issues.apache.org/jira/browse/HDFS-16064) | Determine when to invalidate corrupt replicas based on number of usable replicas | Major | datanode, namenode | Kevin Wikant | Kevin Wikant |
+| [HADOOP-18100](https://issues.apache.org/jira/browse/HADOOP-18100) | Change scope of inner classes in InodeTree to make them accessible outside package | Major | . | Abhishek Das | Abhishek Das |
+| [HADOOP-18334](https://issues.apache.org/jira/browse/HADOOP-18334) | Fix create-release to address removal of GPG\_AGENT\_INFO in branch-3.2 | Major | build | Masatake Iwasaki | Masatake Iwasaki |
+
+
+### TESTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [MAPREDUCE-7342](https://issues.apache.org/jira/browse/MAPREDUCE-7342) | Stop RMService in TestClientRedirect.testRedirect() | Minor | . | Zhengxi Li | Zhengxi Li |
+| [MAPREDUCE-7311](https://issues.apache.org/jira/browse/MAPREDUCE-7311) | Fix non-idempotent test in TestTaskProgressReporter | Minor | . | Zhengxi Li | Zhengxi Li |
+| [HDFS-15862](https://issues.apache.org/jira/browse/HDFS-15862) | Make TestViewfsWithNfs3.testNfsRenameSingleNN() idempotent | Minor | nfs | Zhengxi Li | Zhengxi Li |
+
+
+### SUB-TASKS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-15457](https://issues.apache.org/jira/browse/HDFS-15457) | TestFsDatasetImpl fails intermittently | Major | hdfs | Ahmed Hussein | Ahmed Hussein |
+| [HDFS-15818](https://issues.apache.org/jira/browse/HDFS-15818) | Fix TestFsDatasetImpl.testReadLockCanBeDisabledByConfig | Minor | test | Leon Gao | Leon Gao |
+| [YARN-10503](https://issues.apache.org/jira/browse/YARN-10503) | Support queue capacity in terms of absolute resources with custom resourceType. | Critical | . | Qi Zhu | Qi Zhu |
+| [HADOOP-17126](https://issues.apache.org/jira/browse/HADOOP-17126) | implement non-guava Precondition checkNotNull | Major | . | Ahmed Hussein | Ahmed Hussein |
+| [HADOOP-17929](https://issues.apache.org/jira/browse/HADOOP-17929) | implement non-guava Precondition checkArgument | Major | . | Ahmed Hussein | Ahmed Hussein |
+| [HADOOP-17947](https://issues.apache.org/jira/browse/HADOOP-17947) | Provide alternative to Guava VisibleForTesting | Major | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-17930](https://issues.apache.org/jira/browse/HADOOP-17930) | implement non-guava Precondition checkState | Major | . | Ahmed Hussein | Ahmed Hussein |
+| [HADOOP-17374](https://issues.apache.org/jira/browse/HADOOP-17374) | AliyunOSS: support ListObjectsV2 | Major | fs/oss | wujinhu | wujinhu |
+| [HDFS-16336](https://issues.apache.org/jira/browse/HDFS-16336) | De-flake TestRollingUpgrade#testRollback | Minor | hdfs, test | Kevin Wikant | Viraj Jasani |
+| [HDFS-16171](https://issues.apache.org/jira/browse/HDFS-16171) | De-flake testDecommissionStatus | Major | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16169](https://issues.apache.org/jira/browse/HDFS-16169) | Fix TestBlockTokenWithDFSStriped#testEnd2End failure | Major | test | Hui Fei | secfree |
+| [HDFS-16484](https://issues.apache.org/jira/browse/HDFS-16484) | [SPS]: Fix an infinite loop bug in SPSPathIdProcessor thread | Major | . | qinyuren | qinyuren |
+| [HADOOP-16663](https://issues.apache.org/jira/browse/HADOOP-16663) | Backport "HADOOP-16560 [YARN] use protobuf-maven-plugin to generate protobuf classes" to all active branches | Major | . | Duo Zhang | Duo Zhang |
+| [HADOOP-16664](https://issues.apache.org/jira/browse/HADOOP-16664) | Backport "HADOOP-16561 [MAPREDUCE] use protobuf-maven-plugin to generate protobuf classes" to all active branches | Major | . | Duo Zhang | Duo Zhang |
+
+
+### OTHER:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HDFS-16298](https://issues.apache.org/jira/browse/HDFS-16298) | Improve error msg for BlockMissingException | Minor | . | Tao Li | Tao Li |
+| [HDFS-16312](https://issues.apache.org/jira/browse/HDFS-16312) | Fix typo for DataNodeVolumeMetrics and ProfilingFileIoEvents | Minor | . | Tao Li | Tao Li |
+| [HDFS-16326](https://issues.apache.org/jira/browse/HDFS-16326) | Simplify the code for DiskBalancer | Minor | . | Tao Li | Tao Li |
+| [HDFS-16339](https://issues.apache.org/jira/browse/HDFS-16339) | Show the threshold when mover threads quota is exceeded | Minor | . | Tao Li | Tao Li |
+| [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | Make GetClusterNodesRequestPBImpl thread safe | Major | client | Prabhu Joseph | SwathiChandrashekar |
+| [HADOOP-13464](https://issues.apache.org/jira/browse/HADOOP-13464) | update GSON to 2.7+ | Minor | build | Sean Busbey | Igor Dvorzhak |
+| [HADOOP-18191](https://issues.apache.org/jira/browse/HADOOP-18191) | Log retry count while handling exceptions in RetryInvocationHandler | Minor | . | Viraj Jasani | Viraj Jasani |
+| [HDFS-16551](https://issues.apache.org/jira/browse/HDFS-16551) | Backport HADOOP-17588 to 3.3 and other active old branches. | Major | . | Renukaprasad C | Renukaprasad C |
+| [HDFS-16618](https://issues.apache.org/jira/browse/HDFS-16618) | sync\_file\_range error should include more volume and file info | Minor | . | Viraj Jasani | Viraj Jasani |
+| [HADOOP-18300](https://issues.apache.org/jira/browse/HADOOP-18300) | Update Gson to 2.9.0 | Minor | build | Igor Dvorzhak | Igor Dvorzhak |
+
+
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md
new file mode 100644
index 0000000000000..fac976d655da1
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.2.4/RELEASENOTES.3.2.4.md
@@ -0,0 +1,55 @@
+
+
+# Apache Hadoop 3.2.4 Release Notes
+
+These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
+
+
+---
+
+* [YARN-10820](https://issues.apache.org/jira/browse/YARN-10820) | *Major* | **Make GetClusterNodesRequestPBImpl thread safe**
+
+Added syncronization so that the "yarn node list" command does not fail intermittently
+
+
+---
+
+* [YARN-8234](https://issues.apache.org/jira/browse/YARN-8234) | *Critical* | **Improve RM system metrics publisher's performance by pushing events to timeline server in batch**
+
+When Timeline Service V1 or V1.5 is used, if "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch" is set to true, ResourceManager sends timeline events in batch. The default value is false. If this functionality is enabled, the maximum number that events published in batch is configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size". The default value is 1000. The interval of publishing events can be configured by "yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds". By default, it is set to 60 seconds.
+
+
+---
+
+* [HADOOP-18088](https://issues.apache.org/jira/browse/HADOOP-18088) | *Major* | **Replace log4j 1.x with reload4j**
+
+log4j 1 was replaced with reload4j which is fork of log4j 1.2.17 with the goal of fixing pressing security issues.
+
+If you are depending on the hadoop artifacts in your build were explicitly excluding log4 artifacts, and now want to exclude the reload4j files, you will need to update your exclusion lists
+\
+ \org.slf4j\
+ \slf4j-reload4j\
+\
+\
+ \ch.qos.reload4j\
+ \reload4j\
+\
+
+
+
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md
new file mode 100644
index 0000000000000..78b805240c78e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/CHANGELOG.3.3.4.md
@@ -0,0 +1,56 @@
+
+
+# Apache Hadoop Changelog
+
+## Release 3.3.4 - 2022-07-29
+
+
+
+### IMPROVEMENTS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-18044](https://issues.apache.org/jira/browse/HADOOP-18044) | Hadoop - Upgrade to JQuery 3.6.0 | Major | . | Yuan Luo | Yuan Luo |
+| [YARN-11195](https://issues.apache.org/jira/browse/YARN-11195) | Document how to configure NUMA in YARN | Major | documentation | Prabhu Joseph | Samrat Deb |
+| [HADOOP-18332](https://issues.apache.org/jira/browse/HADOOP-18332) | Remove rs-api dependency by downgrading jackson to 2.12.7 | Major | build | PJ Fanning | PJ Fanning |
+| [HADOOP-18354](https://issues.apache.org/jira/browse/HADOOP-18354) | Upgrade reload4j to 1.2.22 due to XXE vulnerability | Major | . | PJ Fanning | PJ Fanning |
+
+
+### BUG FIXES:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-18085](https://issues.apache.org/jira/browse/HADOOP-18085) | S3 SDK Upgrade causes AccessPoint ARN endpoint mistranslation | Major | fs/s3, test | Bogdan Stolojan | Bogdan Stolojan |
+| [YARN-11092](https://issues.apache.org/jira/browse/YARN-11092) | Upgrade jquery ui to 1.13.1 | Major | . | D M Murali Krishna Reddy | groot |
+| [HDFS-16453](https://issues.apache.org/jira/browse/HDFS-16453) | Upgrade okhttp from 2.7.5 to 4.9.3 | Major | hdfs-client | Ivan Viaznikov | groot |
+| [YARN-10974](https://issues.apache.org/jira/browse/YARN-10974) | CS UI: queue filter and openQueues param do not work as expected | Major | capacity scheduler | Chengbing Liu | Chengbing Liu |
+| [HADOOP-18237](https://issues.apache.org/jira/browse/HADOOP-18237) | Upgrade Apache Xerces Java to 2.12.2 | Major | build | groot | groot |
+| [HADOOP-18074](https://issues.apache.org/jira/browse/HADOOP-18074) | Partial/Incomplete groups list can be returned in LDAP groups lookup | Major | security | Philippe Lanoe | Larry McCay |
+| [HADOOP-18079](https://issues.apache.org/jira/browse/HADOOP-18079) | Upgrade Netty to 4.1.77.Final | Major | build | Renukaprasad C | Wei-Chiu Chuang |
+
+
+### SUB-TASKS:
+
+| JIRA | Summary | Priority | Component | Reporter | Contributor |
+|:---- |:---- | :--- |:---- |:---- |:---- |
+| [HADOOP-18068](https://issues.apache.org/jira/browse/HADOOP-18068) | Upgrade AWS SDK to 1.12.132 | Major | build, fs/s3 | Steve Loughran | Steve Loughran |
+| [HADOOP-18307](https://issues.apache.org/jira/browse/HADOOP-18307) | remove hadoop-cos as a dependency of hadoop-cloud-storage | Major | bulid, fs | Steve Loughran | Steve Loughran |
+| [HADOOP-18344](https://issues.apache.org/jira/browse/HADOOP-18344) | AWS SDK update to 1.12.262 to address jackson CVE-2018-7489 | Major | fs/s3 | Steve Loughran | Steve Loughran |
+
+
diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md
new file mode 100644
index 0000000000000..79573880423d6
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/release/3.3.4/RELEASENOTES.3.3.4.md
@@ -0,0 +1,66 @@
+
+
+# Apache Hadoop 3.3.4 Release Notes
+
+These release notes cover new developer and user-facing incompatibilities, important issues, features, and major improvements.
+
+
+---
+
+* [HDFS-16453](https://issues.apache.org/jira/browse/HDFS-16453) | *Major* | **Upgrade okhttp from 2.7.5 to 4.9.3**
+
+okhttp has been updated to address CVE-2021-0341
+
+
+---
+
+* [HADOOP-18237](https://issues.apache.org/jira/browse/HADOOP-18237) | *Major* | **Upgrade Apache Xerces Java to 2.12.2**
+
+Apache Xerces has been updated to 2.12.2 to fix CVE-2022-23437
+
+
+---
+
+* [HADOOP-18307](https://issues.apache.org/jira/browse/HADOOP-18307) | *Major* | **remove hadoop-cos as a dependency of hadoop-cloud-storage**
+
+We have recently become aware that libraries which include a shaded apache httpclient libraries (hadoop-client-runtime.jar, aws-java-sdk-bundle.jar, gcs-connector-shaded.jar, cos\_api-bundle-5.6.19.jar) all load and use the unshaded resource mozilla/public-suffix-list.txt. If an out of date version of this is found on the classpath first, attempts to negotiate TLS connections may fail with the error "Certificate doesn't match any of the subject alternative names". This release does not declare the hadoop-cos library to be a dependency of the hadoop-cloud-storage POM, so applications depending on that module are no longer exposed to this issue. If an application requires use of the hadoop-cos module, please declare an explicit dependency.
+
+
+---
+
+* [HADOOP-18332](https://issues.apache.org/jira/browse/HADOOP-18332) | *Major* | **Remove rs-api dependency by downgrading jackson to 2.12.7**
+
+Downgrades Jackson from 2.13.2 to 2.12.7 to fix class conflicts in downstream projects. This version of jackson does contain the fix for CVE-2020-36518.
+
+
+---
+
+* [HADOOP-18079](https://issues.apache.org/jira/browse/HADOOP-18079) | *Major* | **Upgrade Netty to 4.1.77.Final**
+
+Netty has been updated to address CVE-2019-20444, CVE-2019-20445 and CVE-2022-24823
+
+
+---
+
+* [HADOOP-18344](https://issues.apache.org/jira/browse/HADOOP-18344) | *Major* | **AWS SDK update to 1.12.262 to address jackson CVE-2018-7489**
+
+The AWS SDK has been updated to 1.12.262 to address jackson CVE-2018-7489
+
+
+
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
index acda898ea1342..939881f39df6d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
@@ -26,6 +26,7 @@
import java.util.NoSuchElementException;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.test.LambdaTestUtils;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.apache.hadoop.util.Shell;
@@ -532,4 +533,20 @@ public void testGetLocalPathForWriteForInvalidPaths() throws Exception {
}
}
+ /**
+ * Test to verify LocalDirAllocator log details to provide diagnostics when file creation fails.
+ *
+ * @throws Exception
+ */
+ @Test(timeout = 30000)
+ public void testGetLocalPathForWriteForLessSpace() throws Exception {
+ String dir0 = buildBufferDir(ROOT, 0);
+ String dir1 = buildBufferDir(ROOT, 1);
+ conf.set(CONTEXT, dir0 + "," + dir1);
+ LambdaTestUtils.intercept(DiskErrorException.class,
+ String.format("Could not find any valid local directory for %s with requested size %s",
+ "p1/x", Long.MAX_VALUE - 1), "Expect a DiskErrorException.",
+ () -> dirAllocator.getLocalPathForWrite("p1/x", Long.MAX_VALUE - 1, conf));
+ }
}
+
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java
index 29ef6ca6c7afd..38e16221a4518 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java
@@ -24,7 +24,6 @@
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.LambdaTestUtils;
-import org.apache.hadoop.test.Whitebox;
import org.apache.hadoop.util.StringUtils;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT;
@@ -650,7 +649,8 @@ public void testFileStatusPipeFile() throws Exception {
RawLocalFileSystem fs = spy(origFs);
Configuration conf = mock(Configuration.class);
fs.setConf(conf);
- Whitebox.setInternalState(fs, "useDeprecatedFileStatus", false);
+
+ RawLocalFileSystem.setUseDeprecatedFileStatus(false);
Path path = new Path("/foo");
File pipe = mock(File.class);
when(pipe.isFile()).thenReturn(false);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java
index 5d08b02e113d5..ebf0e14053bba 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestVectoredReadUtils.java
@@ -35,6 +35,8 @@
import org.apache.hadoop.test.HadoopTestBase;
import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges;
+import static org.apache.hadoop.fs.VectoredReadUtils.validateNonOverlappingAndReturnSortedRanges;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.apache.hadoop.test.MoreAsserts.assertFutureCompletedSuccessfully;
import static org.apache.hadoop.test.MoreAsserts.assertFutureFailedExceptionally;
@@ -231,6 +233,36 @@ public void testSortAndMergeMoreCases() throws Exception {
}
+ @Test
+ public void testValidateOverlappingRanges() throws Exception {
+ List input = Arrays.asList(
+ FileRange.createFileRange(100, 100),
+ FileRange.createFileRange(200, 100),
+ FileRange.createFileRange(250, 100)
+ );
+
+ intercept(UnsupportedOperationException.class,
+ () -> validateNonOverlappingAndReturnSortedRanges(input));
+
+ List input1 = Arrays.asList(
+ FileRange.createFileRange(100, 100),
+ FileRange.createFileRange(500, 100),
+ FileRange.createFileRange(1000, 100),
+ FileRange.createFileRange(1000, 100)
+ );
+
+ intercept(UnsupportedOperationException.class,
+ () -> validateNonOverlappingAndReturnSortedRanges(input1));
+
+ List input2 = Arrays.asList(
+ FileRange.createFileRange(100, 100),
+ FileRange.createFileRange(200, 100),
+ FileRange.createFileRange(300, 100)
+ );
+ // consecutive ranges should pass.
+ validateNonOverlappingAndReturnSortedRanges(input2);
+ }
+
@Test
public void testMaxSizeZeroDisablesMering() throws Exception {
List randomRanges = Arrays.asList(
@@ -354,17 +386,31 @@ public void testReadVectored() throws Exception {
List input = Arrays.asList(FileRange.createFileRange(0, 100),
FileRange.createFileRange(100_000, 100),
FileRange.createFileRange(200_000, 100));
+ runAndValidateVectoredRead(input);
+ }
+
+ @Test
+ public void testReadVectoredZeroBytes() throws Exception {
+ List input = Arrays.asList(FileRange.createFileRange(0, 0),
+ FileRange.createFileRange(100_000, 100),
+ FileRange.createFileRange(200_000, 0));
+ runAndValidateVectoredRead(input);
+ }
+
+
+ private void runAndValidateVectoredRead(List input)
+ throws Exception {
Stream stream = Mockito.mock(Stream.class);
Mockito.doAnswer(invocation -> {
fillBuffer(invocation.getArgument(1));
return null;
}).when(stream).readFully(ArgumentMatchers.anyLong(),
- ArgumentMatchers.any(ByteBuffer.class));
+ ArgumentMatchers.any(ByteBuffer.class));
// should not merge the ranges
VectoredReadUtils.readVectored(stream, input, ByteBuffer::allocate);
Mockito.verify(stream, Mockito.times(3))
- .readFully(ArgumentMatchers.anyLong(), ArgumentMatchers.any(ByteBuffer.class));
- for(int b=0; b < input.size(); ++b) {
+ .readFully(ArgumentMatchers.anyLong(), ArgumentMatchers.any(ByteBuffer.class));
+ for (int b = 0; b < input.size(); ++b) {
validateBuffer("buffer " + b, input.get(b).getData().get(), 0);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
index 77bcc496ff4a2..86b645b9ec9c5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java
@@ -24,11 +24,10 @@
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
import java.util.function.IntFunction;
import org.assertj.core.api.Assertions;
-import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -43,13 +42,14 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.impl.FutureIOSupport;
import org.apache.hadoop.io.WeakReferencedElasticByteBufferPool;
-import org.apache.hadoop.test.LambdaTestUtils;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertCapabilities;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertDatasetEquals;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.returnBuffersToPoolPostRead;
import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture;
@RunWith(Parameterized.class)
public abstract class AbstractContractVectoredReadTest extends AbstractFSContractTestBase {
@@ -84,6 +84,10 @@ public IntFunction getAllocate() {
return allocate;
}
+ public WeakReferencedElasticByteBufferPool getPool() {
+ return pool;
+ }
+
@Override
public void setup() throws Exception {
super.setup();
@@ -268,6 +272,11 @@ public void testConsecutiveRanges() throws Exception {
}
}
+ /**
+ * Test to validate EOF ranges. Default implementation fails with EOFException
+ * while reading the ranges. Some implementation like s3, checksum fs fail fast
+ * as they already have the file length calculated.
+ */
@Test
public void testEOFRanges() throws Exception {
FileSystem fs = getFileSystem();
@@ -277,16 +286,11 @@ public void testEOFRanges() throws Exception {
in.readVectored(fileRanges, allocate);
for (FileRange res : fileRanges) {
CompletableFuture data = res.getData();
- try {
- ByteBuffer buffer = data.get();
- // Shouldn't reach here.
- Assert.fail("EOFException must be thrown while reading EOF");
- } catch (ExecutionException ex) {
- // ignore as expected.
- } catch (Exception ex) {
- LOG.error("Exception while running vectored read ", ex);
- Assert.fail("Exception while running vectored read " + ex);
- }
+ interceptFuture(EOFException.class,
+ "",
+ ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS,
+ TimeUnit.SECONDS,
+ data);
}
}
}
@@ -382,6 +386,13 @@ protected List getSampleOverlappingRanges() {
return fileRanges;
}
+ protected List getConsecutiveRanges() {
+ List fileRanges = new ArrayList<>();
+ fileRanges.add(FileRange.createFileRange(100, 500));
+ fileRanges.add(FileRange.createFileRange(600, 500));
+ return fileRanges;
+ }
+
/**
* Validate that exceptions must be thrown during a vectored
* read operation with specific input ranges.
@@ -399,7 +410,7 @@ protected void verifyExceptionalVectoredRead(
fs.openFile(path(VECTORED_READ_FILE_NAME))
.build();
try (FSDataInputStream in = builder.get()) {
- LambdaTestUtils.intercept(clazz,
+ intercept(clazz,
() -> in.readVectored(fileRanges, allocate));
}
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java
index 5d6ca3f8f0c90..5ee888015315c 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractVectoredRead.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.fs.contract.localfs;
+import java.io.EOFException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CompletableFuture;
@@ -30,6 +31,7 @@
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileRange;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractContractVectoredReadTest;
@@ -52,9 +54,33 @@ protected AbstractFSContract createContract(Configuration conf) {
@Test
public void testChecksumValidationDuringVectoredRead() throws Exception {
- Path testPath = path("big_range_checksum");
+ Path testPath = path("big_range_checksum_file");
+ List someRandomRanges = new ArrayList<>();
+ someRandomRanges.add(FileRange.createFileRange(10, 1024));
+ someRandomRanges.add(FileRange.createFileRange(1025, 1024));
+ validateCheckReadException(testPath, DATASET_LEN, someRandomRanges);
+ }
+
+
+ /**
+ * Test for file size less than checksum chunk size.
+ * {@code ChecksumFileSystem#bytesPerChecksum}.
+ */
+ @Test
+ public void testChecksumValidationDuringVectoredReadSmallFile() throws Exception {
+ Path testPath = path("big_range_checksum_file");
+ final int length = 471;
+ List smallFileRanges = new ArrayList<>();
+ smallFileRanges.add(FileRange.createFileRange(10, 50));
+ smallFileRanges.add(FileRange.createFileRange(100, 20));
+ validateCheckReadException(testPath, length, smallFileRanges);
+ }
+
+ private void validateCheckReadException(Path testPath,
+ int length,
+ List ranges) throws Exception {
LocalFileSystem localFs = (LocalFileSystem) getFileSystem();
- final byte[] datasetCorrect = ContractTestUtils.dataset(DATASET_LEN, 'a', 32);
+ final byte[] datasetCorrect = ContractTestUtils.dataset(length, 'a', 32);
try (FSDataOutputStream out = localFs.create(testPath, true)){
out.write(datasetCorrect);
}
@@ -63,24 +89,55 @@ public void testChecksumValidationDuringVectoredRead() throws Exception {
.describedAs("Checksum file should be present")
.isTrue();
CompletableFuture fis = localFs.openFile(testPath).build();
- List someRandomRanges = new ArrayList<>();
- someRandomRanges.add(FileRange.createFileRange(10, 1024));
- someRandomRanges.add(FileRange.createFileRange(1025, 1024));
try (FSDataInputStream in = fis.get()){
- in.readVectored(someRandomRanges, getAllocate());
- validateVectoredReadResult(someRandomRanges, datasetCorrect);
+ in.readVectored(ranges, getAllocate());
+ validateVectoredReadResult(ranges, datasetCorrect);
}
- final byte[] datasetCorrupted = ContractTestUtils.dataset(DATASET_LEN, 'a', 64);
+ final byte[] datasetCorrupted = ContractTestUtils.dataset(length, 'a', 64);
try (FSDataOutputStream out = localFs.getRaw().create(testPath, true)){
out.write(datasetCorrupted);
}
CompletableFuture fisN = localFs.openFile(testPath).build();
try (FSDataInputStream in = fisN.get()){
- in.readVectored(someRandomRanges, getAllocate());
+ in.readVectored(ranges, getAllocate());
// Expect checksum exception when data is updated directly through
// raw local fs instance.
intercept(ChecksumException.class,
- () -> validateVectoredReadResult(someRandomRanges, datasetCorrupted));
+ () -> validateVectoredReadResult(ranges, datasetCorrupted));
+ }
+ }
+ @Test
+ public void tesChecksumVectoredReadBoundaries() throws Exception {
+ Path testPath = path("boundary_range_checksum_file");
+ final int length = 1071;
+ LocalFileSystem localFs = (LocalFileSystem) getFileSystem();
+ final byte[] datasetCorrect = ContractTestUtils.dataset(length, 'a', 32);
+ try (FSDataOutputStream out = localFs.create(testPath, true)){
+ out.write(datasetCorrect);
+ }
+ Path checksumPath = localFs.getChecksumFile(testPath);
+ Assertions.assertThat(localFs.exists(checksumPath))
+ .describedAs("Checksum file should be present at {} ", checksumPath)
+ .isTrue();
+ CompletableFuture fis = localFs.openFile(testPath).build();
+ List smallRange = new ArrayList<>();
+ smallRange.add(FileRange.createFileRange(1000, 71));
+ try (FSDataInputStream in = fis.get()){
+ in.readVectored(smallRange, getAllocate());
+ validateVectoredReadResult(smallRange, datasetCorrect);
}
}
+
+
+ /**
+ * Overriding in checksum fs as vectored read api fails fast
+ * in case of EOF requested range.
+ */
+ @Override
+ public void testEOFRanges() throws Exception {
+ FileSystem fs = getFileSystem();
+ List fileRanges = new ArrayList<>();
+ fileRanges.add(FileRange.createFileRange(DATASET_LEN, 100));
+ verifyExceptionalVectoredRead(fs, fileRanges, EOFException.class);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java
new file mode 100644
index 0000000000000..82348d97798ea
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/ExceptionAsserts.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import org.apache.hadoop.test.LambdaTestUtils;
+
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
+
+public final class ExceptionAsserts {
+
+ private ExceptionAsserts() {
+ }
+
+
+ /**
+ * Asserts that the given code throws an exception of the given type
+ * and that the exception message contains the given sub-message.
+ *
+ * Usage:
+ *
+ * ExceptionAsserts.assertThrows(
+ * IllegalArgumentException.class,
+ * "'nullArg' must not be null",
+ * () -> Preconditions.checkNotNull(null, "nullArg"));
+ *
+ * Note: JUnit 5 has similar functionality but it will be a long time before
+ * we move to that framework because of significant differences and lack of
+ * backward compatibility for some JUnit rules.
+ */
+ public static void assertThrows(
+ Class expectedExceptionClass,
+ String partialMessage,
+ LambdaTestUtils.VoidCallable code) throws Exception {
+
+ intercept(expectedExceptionClass, partialMessage, code);
+
+ }
+
+ public static void assertThrows(
+ Class expectedExceptionClass,
+ LambdaTestUtils.VoidCallable code) throws Exception {
+
+ intercept(expectedExceptionClass, code);
+
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java
new file mode 100644
index 0000000000000..b6f744582d3e2
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/prefetch/SampleDataForTests.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Frequently used test data items.
+ */
+public final class SampleDataForTests {
+
+ private SampleDataForTests() {
+ }
+
+
+ // Array data.
+ public static final Object[] NULL_ARRAY = null;
+
+ public static final Object[] EMPTY_ARRAY = new Object[0];
+
+ public static final Object[] NON_EMPTY_ARRAY = new Object[1];
+
+ public static final byte[] NULL_BYTE_ARRAY = null;
+
+ public static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
+
+ public static final byte[] NON_EMPTY_BYTE_ARRAY = new byte[1];
+
+ public static final short[] NULL_SHORT_ARRAY = null;
+
+ public static final short[] EMPTY_SHORT_ARRAY = new short[0];
+
+ public static final short[] NON_EMPTY_SHORT_ARRAY = new short[1];
+
+ public static final int[] NULL_INT_ARRAY = null;
+
+ public static final int[] EMPTY_INT_ARRAY = new int[0];
+
+ public static final int[] NON_EMPTY_INT_ARRAY = new int[1];
+
+ public static final long[] NULL_LONG_ARRAY = null;
+
+ public static final long[] EMPTY_LONG_ARRAY = new long[0];
+
+ public static final long[] NON_EMPTY_LONG_ARRAY = new long[1];
+
+ public static final List